LibUnicode: Produce each code point's general category
This will be needed for the Unicode Standard's Default Case Algorithm. Generate the field as an enumeration rather than a string for easier comparison.
This commit is contained in:
parent
32ea461385
commit
5b110034dd
Notes:
sideshowbarker
2024-07-18 08:01:35 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/5b110034ddc Pull-request: https://github.com/SerenityOS/serenity/pull/9043 Reviewed-by: https://github.com/linusg
1 changed files with 20 additions and 2 deletions
|
@ -70,10 +70,12 @@ struct UnicodeData {
|
||||||
|
|
||||||
Vector<CodePointData> code_point_data;
|
Vector<CodePointData> code_point_data;
|
||||||
Vector<CodePointRange> code_point_ranges;
|
Vector<CodePointRange> code_point_ranges;
|
||||||
|
Vector<String> general_categories;
|
||||||
u32 last_contiguous_code_point { 0 };
|
u32 last_contiguous_code_point { 0 };
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr auto s_desired_fields = Array {
|
static constexpr auto s_desired_fields = Array {
|
||||||
|
"general_category"sv,
|
||||||
"simple_uppercase_mapping"sv,
|
"simple_uppercase_mapping"sv,
|
||||||
"simple_lowercase_mapping"sv,
|
"simple_lowercase_mapping"sv,
|
||||||
};
|
};
|
||||||
|
@ -202,10 +204,14 @@ static void parse_unicode_data(Core::File& file, UnicodeData& unicode_data)
|
||||||
|
|
||||||
unicode_data.largest_special_casing_size = max(unicode_data.largest_special_casing_size, data.special_casing_indices.size());
|
unicode_data.largest_special_casing_size = max(unicode_data.largest_special_casing_size, data.special_casing_indices.size());
|
||||||
|
|
||||||
|
if (!unicode_data.general_categories.contains_slow(data.general_category))
|
||||||
|
unicode_data.general_categories.append(data.general_category);
|
||||||
|
|
||||||
previous_code_point = data.code_point;
|
previous_code_point = data.code_point;
|
||||||
unicode_data.code_point_data.append(move(data));
|
unicode_data.code_point_data.append(move(data));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
quick_sort(unicode_data.general_categories);
|
||||||
unicode_data.last_contiguous_code_point = *last_contiguous_code_point;
|
unicode_data.last_contiguous_code_point = *last_contiguous_code_point;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -248,6 +254,18 @@ enum class Condition {
|
||||||
generator.append(R"~~~(
|
generator.append(R"~~~(
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// https://www.unicode.org/reports/tr44/#General_Category_Values
|
||||||
|
enum class GeneralCategory {)~~~");
|
||||||
|
|
||||||
|
for (auto const& general_category : unicode_data.general_categories) {
|
||||||
|
generator.set("general_category", general_category);
|
||||||
|
generator.append(R"~~~(
|
||||||
|
@general_category@,)~~~");
|
||||||
|
}
|
||||||
|
|
||||||
|
generator.append(R"~~~(
|
||||||
|
};
|
||||||
|
|
||||||
struct SpecialCasing {
|
struct SpecialCasing {
|
||||||
u32 code_point { 0 };
|
u32 code_point { 0 };
|
||||||
|
|
||||||
|
@ -279,7 +297,7 @@ struct UnicodeData {
|
||||||
|
|
||||||
// Note: For compile-time performance, only primitive types are used.
|
// Note: For compile-time performance, only primitive types are used.
|
||||||
append_field("char const*"sv, "name"sv);
|
append_field("char const*"sv, "name"sv);
|
||||||
append_field("char const*"sv, "general_category"sv);
|
append_field("GeneralCategory"sv, "general_category"sv);
|
||||||
append_field("u8"sv, "canonical_combining_class"sv);
|
append_field("u8"sv, "canonical_combining_class"sv);
|
||||||
append_field("char const*"sv, "bidi_class"sv);
|
append_field("char const*"sv, "bidi_class"sv);
|
||||||
append_field("char const*"sv, "decomposition_type"sv);
|
append_field("char const*"sv, "decomposition_type"sv);
|
||||||
|
@ -381,7 +399,7 @@ static constexpr Array<UnicodeData, @code_point_data_size@> s_unicode_data { {)~
|
||||||
{ @code_point@)~~~");
|
{ @code_point@)~~~");
|
||||||
|
|
||||||
append_field("name", String::formatted("\"{}\"", data.name));
|
append_field("name", String::formatted("\"{}\"", data.name));
|
||||||
append_field("general_category", String::formatted("\"{}\"", data.general_category));
|
append_field("general_category", String::formatted("GeneralCategory::{}", data.general_category));
|
||||||
append_field("canonical_combining_class", String::number(data.canonical_combining_class));
|
append_field("canonical_combining_class", String::number(data.canonical_combining_class));
|
||||||
append_field("bidi_class", String::formatted("\"{}\"", data.bidi_class));
|
append_field("bidi_class", String::formatted("\"{}\"", data.bidi_class));
|
||||||
append_field("decomposition_type", String::formatted("\"{}\"", data.decomposition_type));
|
append_field("decomposition_type", String::formatted("\"{}\"", data.decomposition_type));
|
||||||
|
|
Loading…
Add table
Reference in a new issue