mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-02 04:20:28 +00:00
LibTimeZone+LibUnicode: Generate string data with run-length encoding
Currently, the unique string lists are stored in the initialized data sections of their shared libraries. In order to move the data to the read-only section, generate the strings using RLE arrays. We generate two arrays: the first is the RLE data itself, the second is a list of indices into the RLE array for each string. We then generate a decoding method to convert an RLE string to a StringView.
This commit is contained in:
parent
de980de0e4
commit
becec3578f
Notes:
sideshowbarker
2024-07-17 08:11:40 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/becec3578f Pull-request: https://github.com/SerenityOS/serenity/pull/14876
6 changed files with 138 additions and 59 deletions
|
@ -724,8 +724,8 @@ Optional<Array<NamedOffset, 2>> get_named_time_zone_offsets(TimeZone time_zone,
|
|||
|
||||
auto format_name = [](auto format, auto offset) -> String {
|
||||
if (offset == 0)
|
||||
return s_string_list[format].replace("{}"sv, ""sv, ReplaceMode::FirstOnly);
|
||||
return String::formatted(s_string_list[format], s_string_list[offset]);
|
||||
return decode_string(format).replace("{}"sv, ""sv, ReplaceMode::FirstOnly);
|
||||
return String::formatted(decode_string(format), decode_string(offset));
|
||||
};
|
||||
|
||||
auto set_named_offset = [&](auto& named_offset, auto dst_offset, auto in_dst, auto format, auto offset) {
|
||||
|
@ -776,7 +776,7 @@ Vector<StringView> time_zones_in_region(StringView region)
|
|||
time_zones.ensure_capacity(regional_time_zones.size());
|
||||
|
||||
for (auto time_zone : regional_time_zones)
|
||||
time_zones.unchecked_append(s_string_list[time_zone]);
|
||||
time_zones.unchecked_append(decode_string(time_zone));
|
||||
|
||||
return time_zones;
|
||||
}
|
||||
|
|
|
@ -1811,10 +1811,10 @@ struct CalendarPatternImpl {
|
|||
CalendarPattern to_unicode_calendar_pattern() const {
|
||||
CalendarPattern calendar_pattern {};
|
||||
|
||||
calendar_pattern.skeleton = s_string_list[skeleton];
|
||||
calendar_pattern.pattern = s_string_list[pattern];
|
||||
calendar_pattern.skeleton = decode_string(skeleton);
|
||||
calendar_pattern.pattern = decode_string(pattern);
|
||||
if (pattern12 != 0)
|
||||
calendar_pattern.pattern12 = s_string_list[pattern12];
|
||||
calendar_pattern.pattern12 = decode_string(pattern12);
|
||||
|
||||
convert_calendar_fields(*this, calendar_pattern);
|
||||
return calendar_pattern;
|
||||
|
@ -1843,9 +1843,9 @@ struct CalendarRangePatternImpl {
|
|||
|
||||
if (field != -1)
|
||||
calendar_range_pattern.field = static_cast<CalendarRangePattern::Field>(field);
|
||||
calendar_range_pattern.start_range = s_string_list[start_range];
|
||||
calendar_range_pattern.separator = s_string_list[separator];
|
||||
calendar_range_pattern.end_range = s_string_list[end_range];
|
||||
calendar_range_pattern.start_range = decode_string(start_range);
|
||||
calendar_range_pattern.separator = decode_string(separator);
|
||||
calendar_range_pattern.end_range = decode_string(end_range);
|
||||
|
||||
convert_calendar_fields(*this, calendar_range_pattern);
|
||||
return calendar_range_pattern;
|
||||
|
@ -1929,12 +1929,12 @@ struct TimeZoneFormatImpl {
|
|||
TimeZoneFormat to_time_zone_format() const {
|
||||
TimeZoneFormat time_zone_format {};
|
||||
|
||||
time_zone_format.symbol_ahead_sign = s_string_list[symbol_ahead_sign];
|
||||
time_zone_format.symbol_ahead_separator = s_string_list[symbol_ahead_separator];
|
||||
time_zone_format.symbol_behind_sign = s_string_list[symbol_behind_sign];
|
||||
time_zone_format.symbol_behind_separator = s_string_list[symbol_behind_separator];
|
||||
time_zone_format.gmt_format = s_string_list[gmt_format];
|
||||
time_zone_format.gmt_zero_format = s_string_list[gmt_zero_format];
|
||||
time_zone_format.symbol_ahead_sign = decode_string(symbol_ahead_sign);
|
||||
time_zone_format.symbol_ahead_separator = decode_string(symbol_ahead_separator);
|
||||
time_zone_format.symbol_behind_sign = decode_string(symbol_behind_sign);
|
||||
time_zone_format.symbol_behind_separator = decode_string(symbol_behind_separator);
|
||||
time_zone_format.gmt_format = decode_string(gmt_format);
|
||||
time_zone_format.gmt_zero_format = decode_string(gmt_zero_format);
|
||||
|
||||
return time_zone_format;
|
||||
}
|
||||
|
@ -2200,7 +2200,7 @@ Vector<CalendarRangePattern> get_calendar_range_formats(StringView locale, Strin
|
|||
for (auto format : range_formats) {
|
||||
auto const& pattern = s_calendar_range_patterns[format];
|
||||
|
||||
if (skeleton == s_string_list[pattern.skeleton])
|
||||
if (skeleton == decode_string(pattern.skeleton))
|
||||
result.append(pattern.to_unicode_calendar_range_pattern());
|
||||
}
|
||||
}
|
||||
|
@ -2218,7 +2218,7 @@ Vector<CalendarRangePattern> get_calendar_range12_formats(StringView locale, Str
|
|||
for (auto format : range12_formats) {
|
||||
auto const& pattern = s_calendar_range_patterns[format];
|
||||
|
||||
if (skeleton == s_string_list[pattern.skeleton])
|
||||
if (skeleton == decode_string(pattern.skeleton))
|
||||
result.append(pattern.to_unicode_calendar_range_pattern());
|
||||
}
|
||||
}
|
||||
|
@ -2263,7 +2263,7 @@ Optional<StringView> get_calendar_era_symbol(StringView locale, StringView calen
|
|||
|
||||
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
||||
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
||||
return s_string_list[symbol_index];
|
||||
return decode_string(symbol_index);
|
||||
}
|
||||
|
||||
return {};
|
||||
|
@ -2275,7 +2275,7 @@ Optional<StringView> get_calendar_month_symbol(StringView locale, StringView cal
|
|||
|
||||
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
||||
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
||||
return s_string_list[symbol_index];
|
||||
return decode_string(symbol_index);
|
||||
}
|
||||
|
||||
return {};
|
||||
|
@ -2287,7 +2287,7 @@ Optional<StringView> get_calendar_weekday_symbol(StringView locale, StringView c
|
|||
|
||||
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
||||
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
||||
return s_string_list[symbol_index];
|
||||
return decode_string(symbol_index);
|
||||
}
|
||||
|
||||
return {};
|
||||
|
@ -2299,7 +2299,7 @@ Optional<StringView> get_calendar_day_period_symbol(StringView locale, StringVie
|
|||
|
||||
if (auto value_index = to_underlying(value); value_index < symbols.size()) {
|
||||
if (auto symbol_index = symbols.at(value_index); symbol_index != 0)
|
||||
return s_string_list[symbol_index];
|
||||
return decode_string(symbol_index);
|
||||
}
|
||||
|
||||
return {};
|
||||
|
@ -2400,7 +2400,7 @@ Optional<StringView> get_time_zone_name(StringView locale, StringView time_zone,
|
|||
}
|
||||
|
||||
if (name_index != 0)
|
||||
return s_string_list[name_index];
|
||||
return decode_string(name_index);
|
||||
}
|
||||
|
||||
return {};
|
||||
|
|
|
@ -1117,8 +1117,8 @@ struct DisplayPatternImpl {
|
|||
DisplayPattern to_display_pattern() const
|
||||
{
|
||||
DisplayPattern display_patterns {};
|
||||
display_patterns.locale_pattern = s_string_list[locale_pattern];
|
||||
display_patterns.locale_separator = s_string_list[locale_separator];
|
||||
display_patterns.locale_pattern = decode_string(locale_pattern);
|
||||
display_patterns.locale_separator = decode_string(locale_separator);
|
||||
|
||||
return display_patterns;
|
||||
}
|
||||
|
@ -1266,13 +1266,13 @@ struct CanonicalLanguageID {
|
|||
LanguageID language_id {};
|
||||
language_id.variants.ensure_capacity(variants_size);
|
||||
|
||||
language_id.language = s_string_list[language];
|
||||
language_id.language = decode_string(language);
|
||||
if (script != 0)
|
||||
language_id.script = s_string_list[script];
|
||||
language_id.script = decode_string(script);
|
||||
if (region != 0)
|
||||
language_id.region = s_string_list[region];
|
||||
language_id.region = decode_string(region);
|
||||
for (size_t i = 0; i < variants_size; ++i)
|
||||
language_id.variants.append(s_string_list[variants[i]]);
|
||||
language_id.variants.append(decode_string(variants[i]));
|
||||
|
||||
return language_id;
|
||||
}
|
||||
|
@ -1284,7 +1284,7 @@ struct CanonicalLanguageID {
|
|||
return false;
|
||||
|
||||
for (size_t i = 0; i < variants_size; ++i) {
|
||||
if (s_string_list[variants[i]] != other_variants[i])
|
||||
if (decode_string(variants[i]) != other_variants[i])
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1415,9 +1415,9 @@ static LanguageMapping const* resolve_likely_subtag(LanguageID const& language_i
|
|||
}
|
||||
|
||||
for (auto const& map : s_likely_subtags) {
|
||||
auto const& key_language = s_string_list[map.key.language];
|
||||
auto const& key_script = s_string_list[map.key.script];
|
||||
auto const& key_region = s_string_list[map.key.region];
|
||||
auto const& key_language = decode_string(map.key.language);
|
||||
auto const& key_script = decode_string(map.key.script);
|
||||
auto const& key_region = decode_string(map.key.region);
|
||||
|
||||
if (key_language != search_key.language)
|
||||
continue;
|
||||
|
@ -1463,7 +1463,7 @@ Optional<StringView> get_locale_@enum_snake@_mapping(StringView locale, StringVi
|
|||
auto const& mappings = @unique_list@.at(mapping_index);
|
||||
|
||||
auto @enum_snake@_string_index = mappings.at(@enum_snake@_index);
|
||||
auto @enum_snake@_mapping = s_string_list.at(@enum_snake@_string_index);
|
||||
auto @enum_snake@_mapping = decode_string(@enum_snake@_string_index);
|
||||
|
||||
if (@enum_snake@_mapping.is_empty())
|
||||
return {};
|
||||
|
@ -1493,7 +1493,7 @@ Optional<StringView> get_locale_@enum_snake@_mapping(StringView locale, StringVi
|
|||
|
||||
ValueFromStringOptions options {};
|
||||
options.return_type = "StringView"sv;
|
||||
options.return_format = "s_string_list[{}]"sv;
|
||||
options.return_format = "decode_string({})"sv;
|
||||
|
||||
generate_value_from_string(generator, "resolve_{}_alias"sv, s_string_index_type, enum_snake, move(hashes), options);
|
||||
};
|
||||
|
@ -1606,7 +1606,7 @@ Optional<StringView> get_preferred_keyword_value_for_locale(StringView locale, S
|
|||
if (keyword_indices.is_empty())
|
||||
return {};
|
||||
|
||||
return s_string_list[keyword_indices[0]];
|
||||
return decode_string(keyword_indices[0]);
|
||||
}
|
||||
|
||||
Vector<StringView> get_keywords_for_locale(StringView locale, StringView key)
|
||||
|
@ -1636,7 +1636,7 @@ Vector<StringView> get_keywords_for_locale(StringView locale, StringView key)
|
|||
keywords.ensure_capacity(keyword_indices.size());
|
||||
|
||||
for (auto keyword : keyword_indices)
|
||||
keywords.unchecked_append(s_string_list[keyword]);
|
||||
keywords.unchecked_append(decode_string(keyword));
|
||||
|
||||
return keywords;
|
||||
}
|
||||
|
@ -1673,10 +1673,10 @@ Optional<ListPatterns> get_locale_list_patterns(StringView locale, StringView li
|
|||
auto const& list_patterns = s_list_patterns.at(list_patterns_index);
|
||||
|
||||
if ((list_patterns.type == type_value) && (list_patterns.style == list_pattern_style)) {
|
||||
auto const& start = s_string_list[list_patterns.start];
|
||||
auto const& middle = s_string_list[list_patterns.middle];
|
||||
auto const& end = s_string_list[list_patterns.end];
|
||||
auto const& pair = s_string_list[list_patterns.pair];
|
||||
auto const& start = decode_string(list_patterns.start);
|
||||
auto const& middle = decode_string(list_patterns.middle);
|
||||
auto const& end = decode_string(list_patterns.end);
|
||||
auto const& pair = decode_string(list_patterns.pair);
|
||||
|
||||
return ListPatterns { start, middle, end, pair };
|
||||
}
|
||||
|
@ -1707,9 +1707,9 @@ Optional<CharacterOrder> character_order_for_locale(StringView locale)
|
|||
void resolve_complex_language_aliases(LanguageID& language_id)
|
||||
{
|
||||
for (auto const& map : s_complex_alias) {
|
||||
auto const& key_language = s_string_list[map.key.language];
|
||||
auto const& key_script = s_string_list[map.key.script];
|
||||
auto const& key_region = s_string_list[map.key.region];
|
||||
auto const& key_language = decode_string(map.key.language);
|
||||
auto const& key_script = decode_string(map.key.script);
|
||||
auto const& key_region = decode_string(map.key.region);
|
||||
|
||||
if ((key_language != language_id.language) && (key_language != "und"sv))
|
||||
continue;
|
||||
|
@ -1745,12 +1745,12 @@ Optional<LanguageID> add_likely_subtags(LanguageID const& language_id)
|
|||
|
||||
auto maximized = language_id;
|
||||
|
||||
auto const& key_script = s_string_list[likely_subtag->key.script];
|
||||
auto const& key_region = s_string_list[likely_subtag->key.region];
|
||||
auto const& key_script = decode_string(likely_subtag->key.script);
|
||||
auto const& key_region = decode_string(likely_subtag->key.region);
|
||||
|
||||
auto const& alias_language = s_string_list[likely_subtag->alias.language];
|
||||
auto const& alias_script = s_string_list[likely_subtag->alias.script];
|
||||
auto const& alias_region = s_string_list[likely_subtag->alias.region];
|
||||
auto const& alias_language = decode_string(likely_subtag->alias.language);
|
||||
auto const& alias_script = decode_string(likely_subtag->alias.script);
|
||||
auto const& alias_region = decode_string(likely_subtag->alias.region);
|
||||
|
||||
if (maximized.language == "und"sv)
|
||||
maximized.language = alias_language;
|
||||
|
@ -1765,7 +1765,7 @@ Optional<LanguageID> add_likely_subtags(LanguageID const& language_id)
|
|||
Optional<String> resolve_most_likely_territory(LanguageID const& language_id)
|
||||
{
|
||||
if (auto const* likely_subtag = resolve_likely_subtag(language_id); likely_subtag != nullptr)
|
||||
return s_string_list[likely_subtag->alias.region];
|
||||
return decode_string(likely_subtag->alias.region);
|
||||
return {};
|
||||
}
|
||||
|
||||
|
|
|
@ -825,13 +825,13 @@ struct NumberFormatImpl {
|
|||
number_format.magnitude = magnitude;
|
||||
number_format.exponent = exponent;
|
||||
number_format.plurality = static_cast<PluralCategory>(plurality);
|
||||
number_format.zero_format = s_string_list[zero_format];
|
||||
number_format.positive_format = s_string_list[positive_format];
|
||||
number_format.negative_format = s_string_list[negative_format];
|
||||
number_format.zero_format = decode_string(zero_format);
|
||||
number_format.positive_format = decode_string(positive_format);
|
||||
number_format.negative_format = decode_string(negative_format);
|
||||
|
||||
number_format.identifiers.ensure_capacity(identifiers.size());
|
||||
for (@string_index_type@ identifier : identifiers)
|
||||
number_format.identifiers.append(s_string_list[identifier]);
|
||||
number_format.identifiers.append(decode_string(identifier));
|
||||
|
||||
return number_format;
|
||||
}
|
||||
|
@ -996,7 +996,7 @@ Optional<StringView> get_number_system_symbol(StringView locale, StringView syst
|
|||
if (symbol_index >= symbols.size())
|
||||
return {};
|
||||
|
||||
return s_string_list[symbols[symbol_index]];
|
||||
return decode_string(symbols[symbol_index]);
|
||||
}
|
||||
|
||||
return {};
|
||||
|
@ -1088,7 +1088,7 @@ static Unit const* find_units(StringView locale, StringView unit)
|
|||
for (auto unit_index : locale_units) {
|
||||
auto const& units = s_units.at(unit_index);
|
||||
|
||||
if (unit == s_string_list[units.unit])
|
||||
if (unit == decode_string(units.unit))
|
||||
return &units;
|
||||
};
|
||||
|
||||
|
|
|
@ -218,7 +218,7 @@ struct RelativeTimeFormatImpl {
|
|||
{
|
||||
RelativeTimeFormat relative_time_format {};
|
||||
relative_time_format.plurality = plurality;
|
||||
relative_time_format.pattern = s_string_list[pattern];
|
||||
relative_time_format.pattern = decode_string(pattern);
|
||||
|
||||
return relative_time_format;
|
||||
}
|
||||
|
@ -271,7 +271,7 @@ Vector<RelativeTimeFormat> get_relative_time_format_patterns(StringView locale,
|
|||
continue;
|
||||
if (locale_format.style != style)
|
||||
continue;
|
||||
if (s_string_list[locale_format.tense_or_number] != tense_or_number)
|
||||
if (decode_string(locale_format.tense_or_number) != tense_or_number)
|
||||
continue;
|
||||
|
||||
formats.append(locale_format.to_relative_time_format());
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <AK/HashMap.h>
|
||||
#include <AK/JsonValue.h>
|
||||
#include <AK/LexicalPath.h>
|
||||
#include <AK/NumericLimits.h>
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/QuickSort.h>
|
||||
#include <AK/SourceGenerator.h>
|
||||
|
@ -174,7 +175,7 @@ static constexpr Array<Span<@type@ const>, @size@ + 1> @name@ { {
|
|||
|
||||
// clang-format off
|
||||
// clang-format gets confused by the requires() clauses above, and formats this section very weirdly.
|
||||
private:
|
||||
protected:
|
||||
Vector<StorageType> m_storage;
|
||||
HashMap<StorageType, IndexType> m_storage_indices;
|
||||
// clang-format on
|
||||
|
@ -185,9 +186,87 @@ class UniqueStringStorage : public UniqueStorage<String, StringIndexType> {
|
|||
using Base = UniqueStorage<String, StringIndexType>;
|
||||
|
||||
public:
|
||||
// The goal of the string table generator is to ensure the table is located within the read-only
|
||||
// section of the shared library. If StringViews are generated directly, the table will be located
|
||||
// in the initialized data section. So instead, we generate run-length encoded (RLE) arrays to
|
||||
// represent the strings.
|
||||
void generate(SourceGenerator& generator)
|
||||
{
|
||||
Base::generate(generator, "StringView"sv, "s_string_list"sv, 40);
|
||||
constexpr size_t max_values_per_row = 300;
|
||||
size_t values_in_current_row = 0;
|
||||
|
||||
auto append_hex_value = [&](auto value) {
|
||||
if (values_in_current_row++ > 0)
|
||||
generator.append(", ");
|
||||
|
||||
generator.append(String::formatted("{:#x}", value));
|
||||
|
||||
if (values_in_current_row == max_values_per_row) {
|
||||
values_in_current_row = 0;
|
||||
generator.append(",\n ");
|
||||
}
|
||||
};
|
||||
|
||||
Vector<u32> string_indices;
|
||||
string_indices.ensure_capacity(Base::m_storage.size());
|
||||
u32 next_index { 0 };
|
||||
|
||||
for (auto const& string : Base::m_storage) {
|
||||
// Ensure the string length may be encoded as two u8s.
|
||||
VERIFY(string.length() <= NumericLimits<u16>::max());
|
||||
|
||||
string_indices.unchecked_append(next_index);
|
||||
next_index += string.length() + 2;
|
||||
}
|
||||
|
||||
generator.set("size", String::number(next_index));
|
||||
generator.append(R"~~~(
|
||||
static constexpr Array<u8, @size@> s_encoded_strings { {
|
||||
)~~~");
|
||||
|
||||
for (auto const& string : Base::m_storage) {
|
||||
auto length = string.length();
|
||||
append_hex_value((length & 0xff00) >> 8);
|
||||
append_hex_value(length & 0x00ff);
|
||||
|
||||
for (auto ch : string)
|
||||
append_hex_value(static_cast<u8>(ch));
|
||||
}
|
||||
|
||||
generator.append(R"~~~(
|
||||
} };
|
||||
)~~~");
|
||||
|
||||
generator.set("size", String::number(string_indices.size()));
|
||||
generator.append(R"~~~(
|
||||
static constexpr Array<u32, @size@> s_encoded_string_indices { {
|
||||
)~~~");
|
||||
|
||||
values_in_current_row = 0;
|
||||
for (auto index : string_indices)
|
||||
append_hex_value(index);
|
||||
|
||||
generator.append(R"~~~(
|
||||
} };
|
||||
|
||||
static constexpr StringView decode_string(size_t index)
|
||||
{
|
||||
if (index == 0)
|
||||
return {};
|
||||
|
||||
index = s_encoded_string_indices[index - 1];
|
||||
|
||||
auto length_high = s_encoded_strings[index];
|
||||
auto length_low = s_encoded_strings[index + 1];
|
||||
|
||||
size_t length = (length_high << 8) | length_low;
|
||||
if (length == 0)
|
||||
return {};
|
||||
|
||||
auto const* start = &s_encoded_strings[index + 2];
|
||||
return { reinterpret_cast<char const*>(start), length };
|
||||
}
|
||||
)~~~");
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue