mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 23:50:19 +00:00
Meta+LibUnicode: Avoid relocations for static unicode data
Previously the s_decomposition_mappings variable would refer to other data in s_decomposition_mappings_data. This would cause thousands of avoidable relocations at load time. This saves about 128kB RAM for each process which uses LibUnicode.
This commit is contained in:
parent
fb71df5cb1
commit
2d3567ee92
Notes:
sideshowbarker
2024-07-17 04:44:10 +09:00
Author: https://github.com/gunnarbeutner Commit: https://github.com/SerenityOS/serenity/commit/2d3567ee92 Pull-request: https://github.com/SerenityOS/serenity/pull/15953 Reviewed-by: https://github.com/trflynn89 ✅
3 changed files with 26 additions and 12 deletions
|
@ -768,6 +768,13 @@ struct SpecialCasing {
|
|||
Condition condition { Condition::None };
|
||||
};
|
||||
|
||||
struct CodePointDecompositionRaw {
|
||||
u32 code_point { 0 };
|
||||
CompatibilityFormattingTag tag { CompatibilityFormattingTag::Canonical };
|
||||
size_t decomposition_index { 0 };
|
||||
size_t decomposition_count { 0 };
|
||||
};
|
||||
|
||||
struct CodePointDecomposition {
|
||||
u32 code_point { 0 };
|
||||
CompatibilityFormattingTag tag { CompatibilityFormattingTag::Canonical };
|
||||
|
@ -947,7 +954,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
|
|||
generator.set("tag", mapping->tag);
|
||||
generator.set("start", String::number(mapping->decomposition_index));
|
||||
generator.set("size", String::number(mapping->decomposition_size));
|
||||
generator.append(", CompatibilityFormattingTag::@tag@, Span<u32 const> { s_decomposition_mappings_data.data() + @start@, @size@ } },");
|
||||
generator.append(", CompatibilityFormattingTag::@tag@, @start@, @size@ },");
|
||||
} else {
|
||||
append_list_and_size(data.special_casing_indices, "&s_special_casing[{}]"sv);
|
||||
generator.append(" },");
|
||||
|
@ -974,7 +981,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
|
|||
append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; });
|
||||
append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; });
|
||||
|
||||
append_code_point_mappings("decomposition"sv, "CodePointDecomposition"sv, unicode_data.code_points_with_decomposition_mapping,
|
||||
append_code_point_mappings("decomposition"sv, "CodePointDecompositionRaw"sv, unicode_data.code_points_with_decomposition_mapping,
|
||||
[](auto const& data) {
|
||||
return data.decomposition_mapping;
|
||||
});
|
||||
|
@ -1153,17 +1160,20 @@ Optional<StringView> code_point_abbreviation(u32 code_point)
|
|||
return decode_string(mapping->abbreviation);
|
||||
}
|
||||
|
||||
Optional<CodePointDecomposition const&> code_point_decomposition(u32 code_point)
|
||||
Optional<CodePointDecomposition const> code_point_decomposition(u32 code_point)
|
||||
{
|
||||
auto const* mapping = binary_search(s_decomposition_mappings, code_point, nullptr, CodePointComparator<CodePointDecomposition> {});
|
||||
auto const* mapping = binary_search(s_decomposition_mappings, code_point, nullptr, CodePointComparator<CodePointDecompositionRaw> {});
|
||||
if (mapping == nullptr)
|
||||
return {};
|
||||
return *mapping;
|
||||
return CodePointDecomposition { mapping->code_point, mapping->tag, Span<u32 const> { s_decomposition_mappings_data.data() + mapping->decomposition_index, mapping->decomposition_count } };
|
||||
}
|
||||
|
||||
Span<CodePointDecomposition const> code_point_decompositions()
|
||||
Optional<CodePointDecomposition const> code_point_decomposition_by_index(size_t index)
|
||||
{
|
||||
return s_decomposition_mappings;
|
||||
if (index >= s_decomposition_mappings.size())
|
||||
return {};
|
||||
auto const& mapping = s_decomposition_mappings[index];
|
||||
return CodePointDecomposition { mapping.code_point, mapping.tag, Span<u32 const> { s_decomposition_mappings_data.data() + mapping.decomposition_index, mapping.decomposition_count } };
|
||||
}
|
||||
)~~~");
|
||||
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
|
||||
namespace Unicode {
|
||||
|
||||
Optional<CodePointDecomposition const&> __attribute__((weak)) code_point_decomposition(u32) { return {}; }
|
||||
Span<CodePointDecomposition const> __attribute__((weak)) code_point_decompositions() { return {}; }
|
||||
Optional<CodePointDecomposition const> __attribute__((weak)) code_point_decomposition(u32) { return {}; }
|
||||
Optional<CodePointDecomposition const> __attribute__((weak)) code_point_decomposition_by_index(size_t) { return {}; }
|
||||
|
||||
NormalizationForm normalization_form_from_string(StringView form)
|
||||
{
|
||||
|
@ -122,7 +122,11 @@ static u32 combine_code_points(u32 a, u32 b)
|
|||
{
|
||||
Array<u32, 2> const points { a, b };
|
||||
// FIXME: Do something better than linear search to find reverse mappings.
|
||||
for (auto const& mapping : Unicode::code_point_decompositions()) {
|
||||
for (size_t index = 0;; ++index) {
|
||||
auto mapping_maybe = Unicode::code_point_decomposition_by_index(index);
|
||||
if (!mapping_maybe.has_value())
|
||||
break;
|
||||
auto& mapping = mapping_maybe.value();
|
||||
if (mapping.tag == CompatibilityFormattingTag::Canonical && mapping.decomposition == points) {
|
||||
if (code_point_has_property(mapping.code_point, Property::Full_Composition_Exclusion))
|
||||
continue;
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
|
||||
namespace Unicode {
|
||||
|
||||
Optional<CodePointDecomposition const&> code_point_decomposition(u32 code_point);
|
||||
Span<CodePointDecomposition const> code_point_decompositions();
|
||||
Optional<CodePointDecomposition const> code_point_decomposition(u32 code_point);
|
||||
Optional<CodePointDecomposition const> code_point_decomposition_by_index(size_t index);
|
||||
|
||||
enum class NormalizationForm {
|
||||
NFD,
|
||||
|
|
Loading…
Reference in a new issue