LibTimeZone+LibUnicode: Generate string data with run-length encoding
Currently, the unique string lists are stored in the initialized data sections of their shared libraries. In order to move the data to the read-only section, generate the strings using RLE arrays. We generate two arrays: the first is the RLE data itself, the second is a list of indices into the RLE array for each string. We then generate a decoding method to convert an RLE string to a StringView.
This commit is contained in:
parent
de980de0e4
commit
becec3578f
Notes:
sideshowbarker
2024-07-17 08:11:40 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/becec3578f Pull-request: https://github.com/SerenityOS/serenity/pull/14876
6 changed files with 138 additions and 59 deletions
|
@ -724,8 +724,8 @@ Optional<Array<NamedOffset, 2>> get_named_time_zone_offsets(TimeZone time_zone,
|
||||||
|
|
||||||
auto format_name = [](auto format, auto offset) -> String {
|
auto format_name = [](auto format, auto offset) -> String {
|
||||||
if (offset == 0)
|
if (offset == 0)
|
||||||
return s_string_list[format].replace("{}"sv, ""sv, ReplaceMode::FirstOnly);
|
return decode_string(format).replace("{}"sv, ""sv, ReplaceMode::FirstOnly);
|
||||||
return String::formatted(s_string_list[format], s_string_list[offset]);
|
return String::formatted(decode_string(format), decode_string(offset));
|
||||||
};
|
};
|
||||||
|
|
||||||
auto set_named_offset = [&](auto& named_offset, auto dst_offset, auto in_dst, auto format, auto offset) {
|
auto set_named_offset = [&](auto& named_offset, auto dst_offset, auto in_dst, auto format, auto offset) {
|
||||||
|
@ -776,7 +776,7 @@ Vector<StringView> time_zones_in_region(StringView region)
|
||||||
time_zones.ensure_capacity(regional_time_zones.size());
|
time_zones.ensure_capacity(regional_time_zones.size());
|
||||||
|
|
||||||
for (auto time_zone : regional_time_zones)
|
for (auto time_zone : regional_time_zones)
|
||||||
time_zones.unchecked_append(s_string_list[time_zone]);
|
time_zones.unchecked_append(decode_string(time_zone));
|
||||||
|
|
||||||
return time_zones;
|
return time_zones;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1811,10 +1811,10 @@ struct CalendarPatternImpl {
|
||||||
CalendarPattern to_unicode_calendar_pattern() const {
|
CalendarPattern to_unicode_calendar_pattern() const {
|
||||||
CalendarPattern calendar_pattern {};
|
CalendarPattern calendar_pattern {};
|
||||||
|
|
||||||
calendar_pattern.skeleton = s_string_list[skeleton];
|
calendar_pattern.skeleton = decode_string(skeleton);
|
||||||
calendar_pattern.pattern = s_string_list[pattern];
|
calendar_pattern.pattern = decode_string(pattern);
|
||||||
if (pattern12 != 0)
|
if (pattern12 != 0)
|
||||||
calendar_pattern.pattern12 = s_string_list[pattern12];
|
calendar_pattern.pattern12 = decode_string(pattern12);
|
||||||
|
|
||||||
convert_calendar_fields(*this, calendar_pattern);
|
convert_calendar_fields(*this, calendar_pattern);
|
||||||
return calendar_pattern;
|
return calendar_pattern;
|
||||||
|
@ -1843,9 +1843,9 @@ struct CalendarRangePatternImpl {
|
||||||
|
|
||||||
if (field != -1)
|
if (field != -1)
|
||||||
calendar_range_pattern.field = static_cast<CalendarRangePattern::Field>(field);
|
calendar_range_pattern.field = static_cast<CalendarRangePattern::Field>(field);
|
||||||
calendar_range_pattern.start_range = s_string_list[start_range];
|
calendar_range_pattern.start_range = decode_string(start_range);
|
||||||
calendar_range_pattern.separator = s_string_list[separator];
|
calendar_range_pattern.separator = decode_string(separator);
|
||||||
calendar_range_pattern.end_range = s_string_list[end_range];
|
calendar_range_pattern.end_range = decode_string(end_range);
|
||||||
|
|
||||||
convert_calendar_fields(*this, calendar_range_pattern);
|
convert_calendar_fields(*this, calendar_range_pattern);
|
||||||
return calendar_range_pattern;
|
return calendar_range_pattern;
|
||||||
|
@ -1929,12 +1929,12 @@ struct TimeZoneFormatImpl {
|
||||||
TimeZoneFormat to_time_zone_format() const {
|
TimeZoneFormat to_time_zone_format() const {
|
||||||
TimeZoneFormat time_zone_format {};
|
TimeZoneFormat time_zone_format {};
|
||||||
|
|
||||||
time_zone_format.symbol_ahead_sign = s_string_list[symbol_ahead_sign];
|
time_zone_format.symbol_ahead_sign = decode_string(symbol_ahead_sign);
|
||||||
time_zone_format.symbol_ahead_separator = s_string_list[symbol_ahead_separator];
|
time_zone_format.symbol_ahead_separator = decode_string(symbol_ahead_separator);
|
||||||
time_zone_format.symbol_behind_sign = s_string_list[symbol_behind_sign];
|
time_zone_format.symbol_behind_sign = decode_string(symbol_behind_sign);
|
||||||
time_zone_format.symbol_behind_separator = s_string_list[symbol_behind_separator];
|
time_zone_format.symbol_behind_separator = decode_string(symbol_behind_separator);
|
||||||
time_zone_format.gmt_format = s_string_list[gmt_format];
|
time_zone_format.gmt_format = decode_string(gmt_format);
|
||||||
time_zone_format.gmt_zero_format = s_string_list[gmt_zero_format];
|
time_zone_format.gmt_zero_format = decode_string(gmt_zero_format);
|
||||||
|
|
||||||
return time_zone_format;
|
return time_zone_format;
|
||||||
}
|
}
|
||||||
|
@ -2200,7 +2200,7 @@ Vector<CalendarRangePattern> get_calendar_range_formats(StringView locale, Strin
|
||||||
for (auto format : range_formats) {
|
for (auto format : range_formats) {
|
||||||
auto const& pattern = s_calendar_range_patterns[format];
|
auto const& pattern = s_calendar_range_patterns[format];
|
||||||
|
|
||||||
if (skeleton == s_string_list[pattern.skeleton])
|
if (skeleton == decode_string(pattern.skeleton))
|
||||||
result.append(pattern.to_unicode_calendar_range_pattern());
|
result.append(pattern.to_unicode_calendar_range_pattern());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2218,7 +2218,7 @@ Vector<CalendarRangePattern> get_calendar_range12_formats(StringView locale, Str
|
||||||
for (auto format : range12_formats) {
|
for (auto format : range12_formats) {
|
||||||
auto const& pattern = s_calendar_range_patterns[format];
|
auto const& pattern = s_calendar_range_patterns[format];
|
||||||
|
|
||||||
if (skeleton == s_string_list[pattern.skeleton])
|
if (skeleton == decode_string(pattern.skeleton))
|
||||||
result.append(pattern.to_unicode_calendar_range_pattern());
|
result.append(pattern.to_unicode_calendar_range_pattern());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2263,7 +2263,7 @@ Optional<StringView> get_calendar_era_symbol(StringView locale, StringView calen
|
||||||
|
|
||||||
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
||||||
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
||||||
return s_string_list[symbol_index];
|
return decode_string(symbol_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
@ -2275,7 +2275,7 @@ Optional<StringView> get_calendar_month_symbol(StringView locale, StringView cal
|
||||||
|
|
||||||
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
||||||
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
||||||
return s_string_list[symbol_index];
|
return decode_string(symbol_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
@ -2287,7 +2287,7 @@ Optional<StringView> get_calendar_weekday_symbol(StringView locale, StringView c
|
||||||
|
|
||||||
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
||||||
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
||||||
return s_string_list[symbol_index];
|
return decode_string(symbol_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
@ -2299,7 +2299,7 @@ Optional<StringView> get_calendar_day_period_symbol(StringView locale, StringVie
|
||||||
|
|
||||||
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
||||||
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
||||||
return s_string_list[symbol_index];
|
return decode_string(symbol_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
@ -2400,7 +2400,7 @@ Optional<StringView> get_time_zone_name(StringView locale, StringView time_zone,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (name_index != 0)
|
if (name_index != 0)
|
||||||
return s_string_list[name_index];
|
return decode_string(name_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
|
|
@ -1117,8 +1117,8 @@ struct DisplayPatternImpl {
|
||||||
DisplayPattern to_display_pattern() const
|
DisplayPattern to_display_pattern() const
|
||||||
{
|
{
|
||||||
DisplayPattern display_patterns {};
|
DisplayPattern display_patterns {};
|
||||||
display_patterns.locale_pattern = s_string_list[locale_pattern];
|
display_patterns.locale_pattern = decode_string(locale_pattern);
|
||||||
display_patterns.locale_separator = s_string_list[locale_separator];
|
display_patterns.locale_separator = decode_string(locale_separator);
|
||||||
|
|
||||||
return display_patterns;
|
return display_patterns;
|
||||||
}
|
}
|
||||||
|
@ -1266,13 +1266,13 @@ struct CanonicalLanguageID {
|
||||||
LanguageID language_id {};
|
LanguageID language_id {};
|
||||||
language_id.variants.ensure_capacity(variants_size);
|
language_id.variants.ensure_capacity(variants_size);
|
||||||
|
|
||||||
language_id.language = s_string_list[language];
|
language_id.language = decode_string(language);
|
||||||
if (script != 0)
|
if (script != 0)
|
||||||
language_id.script = s_string_list[script];
|
language_id.script = decode_string(script);
|
||||||
if (region != 0)
|
if (region != 0)
|
||||||
language_id.region = s_string_list[region];
|
language_id.region = decode_string(region);
|
||||||
for (size_t i = 0; i < variants_size; ++i)
|
for (size_t i = 0; i < variants_size; ++i)
|
||||||
language_id.variants.append(s_string_list[variants[i]]);
|
language_id.variants.append(decode_string(variants[i]));
|
||||||
|
|
||||||
return language_id;
|
return language_id;
|
||||||
}
|
}
|
||||||
|
@ -1284,7 +1284,7 @@ struct CanonicalLanguageID {
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
for (size_t i = 0; i < variants_size; ++i) {
|
for (size_t i = 0; i < variants_size; ++i) {
|
||||||
if (s_string_list[variants[i]] != other_variants[i])
|
if (decode_string(variants[i]) != other_variants[i])
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1415,9 +1415,9 @@ static LanguageMapping const* resolve_likely_subtag(LanguageID const& language_i
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto const& map : s_likely_subtags) {
|
for (auto const& map : s_likely_subtags) {
|
||||||
auto const& key_language = s_string_list[map.key.language];
|
auto const& key_language = decode_string(map.key.language);
|
||||||
auto const& key_script = s_string_list[map.key.script];
|
auto const& key_script = decode_string(map.key.script);
|
||||||
auto const& key_region = s_string_list[map.key.region];
|
auto const& key_region = decode_string(map.key.region);
|
||||||
|
|
||||||
if (key_language != search_key.language)
|
if (key_language != search_key.language)
|
||||||
continue;
|
continue;
|
||||||
|
@ -1463,7 +1463,7 @@ Optional<StringView> get_locale_@enum_snake@_mapping(StringView locale, StringVi
|
||||||
auto const& mappings = @unique_list@.at(mapping_index);
|
auto const& mappings = @unique_list@.at(mapping_index);
|
||||||
|
|
||||||
auto @enum_snake@_string_index = mappings.at(@enum_snake@_index);
|
auto @enum_snake@_string_index = mappings.at(@enum_snake@_index);
|
||||||
auto @enum_snake@_mapping = s_string_list.at(@enum_snake@_string_index);
|
auto @enum_snake@_mapping = decode_string(@enum_snake@_string_index);
|
||||||
|
|
||||||
if (@enum_snake@_mapping.is_empty())
|
if (@enum_snake@_mapping.is_empty())
|
||||||
return {};
|
return {};
|
||||||
|
@ -1493,7 +1493,7 @@ Optional<StringView> get_locale_@enum_snake@_mapping(StringView locale, StringVi
|
||||||
|
|
||||||
ValueFromStringOptions options {};
|
ValueFromStringOptions options {};
|
||||||
options.return_type = "StringView"sv;
|
options.return_type = "StringView"sv;
|
||||||
options.return_format = "s_string_list[{}]"sv;
|
options.return_format = "decode_string({})"sv;
|
||||||
|
|
||||||
generate_value_from_string(generator, "resolve_{}_alias"sv, s_string_index_type, enum_snake, move(hashes), options);
|
generate_value_from_string(generator, "resolve_{}_alias"sv, s_string_index_type, enum_snake, move(hashes), options);
|
||||||
};
|
};
|
||||||
|
@ -1606,7 +1606,7 @@ Optional<StringView> get_preferred_keyword_value_for_locale(StringView locale, S
|
||||||
if (keyword_indices.is_empty())
|
if (keyword_indices.is_empty())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
return s_string_list[keyword_indices[0]];
|
return decode_string(keyword_indices[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
Vector<StringView> get_keywords_for_locale(StringView locale, StringView key)
|
Vector<StringView> get_keywords_for_locale(StringView locale, StringView key)
|
||||||
|
@ -1636,7 +1636,7 @@ Vector<StringView> get_keywords_for_locale(StringView locale, StringView key)
|
||||||
keywords.ensure_capacity(keyword_indices.size());
|
keywords.ensure_capacity(keyword_indices.size());
|
||||||
|
|
||||||
for (auto keyword : keyword_indices)
|
for (auto keyword : keyword_indices)
|
||||||
keywords.unchecked_append(s_string_list[keyword]);
|
keywords.unchecked_append(decode_string(keyword));
|
||||||
|
|
||||||
return keywords;
|
return keywords;
|
||||||
}
|
}
|
||||||
|
@ -1673,10 +1673,10 @@ Optional<ListPatterns> get_locale_list_patterns(StringView locale, StringView li
|
||||||
auto const& list_patterns = s_list_patterns.at(list_patterns_index);
|
auto const& list_patterns = s_list_patterns.at(list_patterns_index);
|
||||||
|
|
||||||
if ((list_patterns.type == type_value) && (list_patterns.style == list_pattern_style)) {
|
if ((list_patterns.type == type_value) && (list_patterns.style == list_pattern_style)) {
|
||||||
auto const& start = s_string_list[list_patterns.start];
|
auto const& start = decode_string(list_patterns.start);
|
||||||
auto const& middle = s_string_list[list_patterns.middle];
|
auto const& middle = decode_string(list_patterns.middle);
|
||||||
auto const& end = s_string_list[list_patterns.end];
|
auto const& end = decode_string(list_patterns.end);
|
||||||
auto const& pair = s_string_list[list_patterns.pair];
|
auto const& pair = decode_string(list_patterns.pair);
|
||||||
|
|
||||||
return ListPatterns { start, middle, end, pair };
|
return ListPatterns { start, middle, end, pair };
|
||||||
}
|
}
|
||||||
|
@ -1707,9 +1707,9 @@ Optional<CharacterOrder> character_order_for_locale(StringView locale)
|
||||||
void resolve_complex_language_aliases(LanguageID& language_id)
|
void resolve_complex_language_aliases(LanguageID& language_id)
|
||||||
{
|
{
|
||||||
for (auto const& map : s_complex_alias) {
|
for (auto const& map : s_complex_alias) {
|
||||||
auto const& key_language = s_string_list[map.key.language];
|
auto const& key_language = decode_string(map.key.language);
|
||||||
auto const& key_script = s_string_list[map.key.script];
|
auto const& key_script = decode_string(map.key.script);
|
||||||
auto const& key_region = s_string_list[map.key.region];
|
auto const& key_region = decode_string(map.key.region);
|
||||||
|
|
||||||
if ((key_language != language_id.language) && (key_language != "und"sv))
|
if ((key_language != language_id.language) && (key_language != "und"sv))
|
||||||
continue;
|
continue;
|
||||||
|
@ -1745,12 +1745,12 @@ Optional<LanguageID> add_likely_subtags(LanguageID const& language_id)
|
||||||
|
|
||||||
auto maximized = language_id;
|
auto maximized = language_id;
|
||||||
|
|
||||||
auto const& key_script = s_string_list[likely_subtag->key.script];
|
auto const& key_script = decode_string(likely_subtag->key.script);
|
||||||
auto const& key_region = s_string_list[likely_subtag->key.region];
|
auto const& key_region = decode_string(likely_subtag->key.region);
|
||||||
|
|
||||||
auto const& alias_language = s_string_list[likely_subtag->alias.language];
|
auto const& alias_language = decode_string(likely_subtag->alias.language);
|
||||||
auto const& alias_script = s_string_list[likely_subtag->alias.script];
|
auto const& alias_script = decode_string(likely_subtag->alias.script);
|
||||||
auto const& alias_region = s_string_list[likely_subtag->alias.region];
|
auto const& alias_region = decode_string(likely_subtag->alias.region);
|
||||||
|
|
||||||
if (maximized.language == "und"sv)
|
if (maximized.language == "und"sv)
|
||||||
maximized.language = alias_language;
|
maximized.language = alias_language;
|
||||||
|
@ -1765,7 +1765,7 @@ Optional<LanguageID> add_likely_subtags(LanguageID const& language_id)
|
||||||
Optional<String> resolve_most_likely_territory(LanguageID const& language_id)
|
Optional<String> resolve_most_likely_territory(LanguageID const& language_id)
|
||||||
{
|
{
|
||||||
if (auto const* likely_subtag = resolve_likely_subtag(language_id); likely_subtag != nullptr)
|
if (auto const* likely_subtag = resolve_likely_subtag(language_id); likely_subtag != nullptr)
|
||||||
return s_string_list[likely_subtag->alias.region];
|
return decode_string(likely_subtag->alias.region);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -825,13 +825,13 @@ struct NumberFormatImpl {
|
||||||
number_format.magnitude = magnitude;
|
number_format.magnitude = magnitude;
|
||||||
number_format.exponent = exponent;
|
number_format.exponent = exponent;
|
||||||
number_format.plurality = static_cast<PluralCategory>(plurality);
|
number_format.plurality = static_cast<PluralCategory>(plurality);
|
||||||
number_format.zero_format = s_string_list[zero_format];
|
number_format.zero_format = decode_string(zero_format);
|
||||||
number_format.positive_format = s_string_list[positive_format];
|
number_format.positive_format = decode_string(positive_format);
|
||||||
number_format.negative_format = s_string_list[negative_format];
|
number_format.negative_format = decode_string(negative_format);
|
||||||
|
|
||||||
number_format.identifiers.ensure_capacity(identifiers.size());
|
number_format.identifiers.ensure_capacity(identifiers.size());
|
||||||
for (@string_index_type@ identifier : identifiers)
|
for (@string_index_type@ identifier : identifiers)
|
||||||
number_format.identifiers.append(s_string_list[identifier]);
|
number_format.identifiers.append(decode_string(identifier));
|
||||||
|
|
||||||
return number_format;
|
return number_format;
|
||||||
}
|
}
|
||||||
|
@ -996,7 +996,7 @@ Optional<StringView> get_number_system_symbol(StringView locale, StringView syst
|
||||||
if (symbol_index >= symbols.size())
|
if (symbol_index >= symbols.size())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
return s_string_list[symbols[symbol_index]];
|
return decode_string(symbols[symbol_index]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
@ -1088,7 +1088,7 @@ static Unit const* find_units(StringView locale, StringView unit)
|
||||||
for (auto unit_index : locale_units) {
|
for (auto unit_index : locale_units) {
|
||||||
auto const& units = s_units.at(unit_index);
|
auto const& units = s_units.at(unit_index);
|
||||||
|
|
||||||
if (unit == s_string_list[units.unit])
|
if (unit == decode_string(units.unit))
|
||||||
return &units;
|
return &units;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -218,7 +218,7 @@ struct RelativeTimeFormatImpl {
|
||||||
{
|
{
|
||||||
RelativeTimeFormat relative_time_format {};
|
RelativeTimeFormat relative_time_format {};
|
||||||
relative_time_format.plurality = plurality;
|
relative_time_format.plurality = plurality;
|
||||||
relative_time_format.pattern = s_string_list[pattern];
|
relative_time_format.pattern = decode_string(pattern);
|
||||||
|
|
||||||
return relative_time_format;
|
return relative_time_format;
|
||||||
}
|
}
|
||||||
|
@ -271,7 +271,7 @@ Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale,
|
||||||
continue;
|
continue;
|
||||||
if (locale_format.style != style)
|
if (locale_format.style != style)
|
||||||
continue;
|
continue;
|
||||||
if (s_string_list[locale_format.tense_or_number] != tense_or_number)
|
if (decode_string(locale_format.tense_or_number) != tense_or_number)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
formats.append(locale_format.to_relative_time_format());
|
formats.append(locale_format.to_relative_time_format());
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include <AK/HashMap.h>
|
#include <AK/HashMap.h>
|
||||||
#include <AK/JsonValue.h>
|
#include <AK/JsonValue.h>
|
||||||
#include <AK/LexicalPath.h>
|
#include <AK/LexicalPath.h>
|
||||||
|
#include <AK/NumericLimits.h>
|
||||||
#include <AK/Optional.h>
|
#include <AK/Optional.h>
|
||||||
#include <AK/QuickSort.h>
|
#include <AK/QuickSort.h>
|
||||||
#include <AK/SourceGenerator.h>
|
#include <AK/SourceGenerator.h>
|
||||||
|
@ -174,7 +175,7 @@ static constexpr Array<Span<@type@ const>, @size@ + 1> @name@ { {
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
// clang-format gets confused by the requires() clauses above, and formats this section very weirdly.
|
// clang-format gets confused by the requires() clauses above, and formats this section very weirdly.
|
||||||
private:
|
protected:
|
||||||
Vector<StorageType> m_storage;
|
Vector<StorageType> m_storage;
|
||||||
HashMap<StorageType, IndexType> m_storage_indices;
|
HashMap<StorageType, IndexType> m_storage_indices;
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
@ -185,9 +186,87 @@ class UniqueStringStorage : public UniqueStorage<String, StringIndexType> {
|
||||||
using Base = UniqueStorage<String, StringIndexType>;
|
using Base = UniqueStorage<String, StringIndexType>;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
// The goal of the string table generator is to ensure the table is located within the read-only
|
||||||
|
// section of the shared library. If StringViews are generated directly, the table will be located
|
||||||
|
// in the initialized data section. So instead, we generate run-length encoded (RLE) arrays to
|
||||||
|
// represent the strings.
|
||||||
void generate(SourceGenerator& generator)
|
void generate(SourceGenerator& generator)
|
||||||
{
|
{
|
||||||
Base::generate(generator, "StringView"sv, "s_string_list"sv, 40);
|
constexpr size_t max_values_per_row = 300;
|
||||||
|
size_t values_in_current_row = 0;
|
||||||
|
|
||||||
|
auto append_hex_value = [&](auto value) {
|
||||||
|
if (values_in_current_row++ > 0)
|
||||||
|
generator.append(", ");
|
||||||
|
|
||||||
|
generator.append(String::formatted("{:#x}", value));
|
||||||
|
|
||||||
|
if (values_in_current_row == max_values_per_row) {
|
||||||
|
values_in_current_row = 0;
|
||||||
|
generator.append(",\n ");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Vector<u32> string_indices;
|
||||||
|
string_indices.ensure_capacity(Base::m_storage.size());
|
||||||
|
u32 next_index { 0 };
|
||||||
|
|
||||||
|
for (auto const& string : Base::m_storage) {
|
||||||
|
// Ensure the string length may be encoded as two u8s.
|
||||||
|
VERIFY(string.length() <= NumericLimits<u16>::max());
|
||||||
|
|
||||||
|
string_indices.unchecked_append(next_index);
|
||||||
|
next_index += string.length() + 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
generator.set("size", String::number(next_index));
|
||||||
|
generator.append(R"~~~(
|
||||||
|
static constexpr Array<u8, @size@> s_encoded_strings { {
|
||||||
|
)~~~");
|
||||||
|
|
||||||
|
for (auto const& string : Base::m_storage) {
|
||||||
|
auto length = string.length();
|
||||||
|
append_hex_value((length & 0xff00) >> 8);
|
||||||
|
append_hex_value(length & 0x00ff);
|
||||||
|
|
||||||
|
for (auto ch : string)
|
||||||
|
append_hex_value(static_cast<u8>(ch));
|
||||||
|
}
|
||||||
|
|
||||||
|
generator.append(R"~~~(
|
||||||
|
} };
|
||||||
|
)~~~");
|
||||||
|
|
||||||
|
generator.set("size", String::number(string_indices.size()));
|
||||||
|
generator.append(R"~~~(
|
||||||
|
static constexpr Array<u32, @size@> s_encoded_string_indices { {
|
||||||
|
)~~~");
|
||||||
|
|
||||||
|
values_in_current_row = 0;
|
||||||
|
for (auto index : string_indices)
|
||||||
|
append_hex_value(index);
|
||||||
|
|
||||||
|
generator.append(R"~~~(
|
||||||
|
} };
|
||||||
|
|
||||||
|
static constexpr StringView decode_string(size_t index)
|
||||||
|
{
|
||||||
|
if (index == 0)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
index = s_encoded_string_indices[index - 1];
|
||||||
|
|
||||||
|
auto length_high = s_encoded_strings[index];
|
||||||
|
auto length_low = s_encoded_strings[index + 1];
|
||||||
|
|
||||||
|
size_t length = (length_high << 8) | length_low;
|
||||||
|
if (length == 0)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
auto const* start = &s_encoded_strings[index + 2];
|
||||||
|
return { reinterpret_cast<char const*>(start), length };
|
||||||
|
}
|
||||||
|
)~~~");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue