فهرست منبع

LibUnicode: Remove completely unused code point name & block name data

These were used for e.g. the Character Map on Serenity, but are not used
at all for Ladybird.
Timothy Flynn 1 سال پیش
والد
کامیت
1feef17bf7

+ 1 - 9
Meta/CMake/unicode_data.cmake

@@ -38,18 +38,12 @@ set(PROP_ALIAS_PATH "${UCD_PATH}/${PROP_ALIAS_SOURCE}")
 set(PROP_VALUE_ALIAS_SOURCE "PropertyValueAliases.txt")
 set(PROP_VALUE_ALIAS_PATH "${UCD_PATH}/${PROP_VALUE_ALIAS_SOURCE}")
 
-set(NAME_ALIAS_SOURCE "NameAliases.txt")
-set(NAME_ALIAS_PATH "${UCD_PATH}/${NAME_ALIAS_SOURCE}")
-
 set(SCRIPTS_SOURCE "Scripts.txt")
 set(SCRIPTS_PATH "${UCD_PATH}/${SCRIPTS_SOURCE}")
 
 set(SCRIPT_EXTENSIONS_SOURCE "ScriptExtensions.txt")
 set(SCRIPT_EXTENSIONS_PATH "${UCD_PATH}/${SCRIPT_EXTENSIONS_SOURCE}")
 
-set(BLOCKS_SOURCE "Blocks.txt")
-set(BLOCKS_PATH "${UCD_PATH}/${BLOCKS_SOURCE}")
-
 set(EMOJI_DATA_SOURCE "emoji/emoji-data.txt")
 set(EMOJI_DATA_PATH "${UCD_PATH}/${EMOJI_DATA_SOURCE}")
 
@@ -90,10 +84,8 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
         extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_BINARY_PROP_SOURCE}" "${DERIVED_BINARY_PROP_PATH}")
         extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_ALIAS_SOURCE}" "${PROP_ALIAS_PATH}")
         extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_VALUE_ALIAS_SOURCE}" "${PROP_VALUE_ALIAS_PATH}")
-        extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${NAME_ALIAS_SOURCE}" "${NAME_ALIAS_PATH}")
         extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPTS_SOURCE}" "${SCRIPTS_PATH}")
         extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPT_EXTENSIONS_SOURCE}" "${SCRIPT_EXTENSIONS_PATH}")
-        extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${BLOCKS_SOURCE}" "${BLOCKS_PATH}")
         extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${EMOJI_DATA_SOURCE}" "${EMOJI_DATA_PATH}")
         extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${NORM_PROPS_SOURCE}" "${NORM_PROPS_PATH}")
         extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${GRAPHEME_BREAK_PROP_SOURCE}" "${GRAPHEME_BREAK_PROP_PATH}")
@@ -129,7 +121,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
         "${UCD_VERSION_FILE}"
         "${UNICODE_DATA_HEADER}"
         "${UNICODE_DATA_IMPLEMENTATION}"
-        arguments -u "${UNICODE_DATA_PATH}" -s "${SPECIAL_CASING_PATH}" -o "${CASE_FOLDING_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -k "${BLOCKS_PATH}" -e "${EMOJI_DATA_PATH}" -m "${NAME_ALIAS_PATH}" -n "${NORM_PROPS_PATH}" -f "${GRAPHEME_BREAK_PROP_PATH}" -w "${WORD_BREAK_PROP_PATH}" -i "${SENTENCE_BREAK_PROP_PATH}"
+        arguments -u "${UNICODE_DATA_PATH}" -s "${SPECIAL_CASING_PATH}" -o "${CASE_FOLDING_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -e "${EMOJI_DATA_PATH}" -n "${NORM_PROPS_PATH}" -f "${GRAPHEME_BREAK_PROP_PATH}" -w "${WORD_BREAK_PROP_PATH}" -i "${SENTENCE_BREAK_PROP_PATH}"
     )
     invoke_generator(
         "EmojiData"

+ 0 - 263
Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp

@@ -65,11 +65,6 @@ struct Normalization {
 
 using NormalizationProps = HashMap<ByteString, Vector<Normalization>>;
 
-struct CodePointName {
-    Unicode::CodePointRange code_point_range;
-    size_t name { 0 };
-};
-
 struct CasingTable {
     bool operator==(CasingTable const& other) const
     {
@@ -93,7 +88,6 @@ struct CasingTable {
 struct CodePointData {
     u32 code_point { 0 };
     ByteString name;
-    Optional<size_t> abbreviation;
     ByteString bidi_class;
     Optional<CodePointDecomposition> decomposition_mapping;
     Optional<i8> numeric_value_decimal;
@@ -105,11 +99,6 @@ struct CodePointData {
     CasingTable casing;
 };
 
-struct BlockName {
-    Unicode::CodePointRange code_point_range;
-    size_t name { 0 };
-};
-
 using PropertyTable = Vector<bool>;
 
 static constexpr auto CODE_POINT_TABLES_MSB_COUNT = 16u;
@@ -136,8 +125,6 @@ struct CodePointComposition {
 };
 
 struct UnicodeData {
-    UniqueStringStorage unique_strings;
-
     u32 code_points_with_decomposition_mapping { 0 };
     Vector<u32> decomposition_mappings;
     HashMap<u32, Vector<CodePointComposition>> composition_mappings;
@@ -154,10 +141,6 @@ struct UnicodeData {
 
     Vector<CodePointData> code_point_data;
 
-    HashMap<u32, size_t> code_point_abbreviations;
-    HashMap<u32, size_t> code_point_display_name_aliases;
-    Vector<CodePointName> code_point_display_names;
-
     // https://www.unicode.org/reports/tr44/#General_Category_Values
     PropList general_categories;
     Vector<Alias> general_category_aliases;
@@ -178,8 +161,6 @@ struct UnicodeData {
     Vector<Alias> script_aliases;
     PropList script_extensions;
 
-    Vector<BlockName> block_display_names;
-
     // FIXME: We are not yet doing anything with this data. It will be needed for String.prototype.normalize.
     NormalizationProps normalization_props;
 
@@ -430,37 +411,6 @@ static ErrorOr<void> parse_alias_list(Core::InputBufferedFile& file, PropList co
     return {};
 }
 
-static ErrorOr<void> parse_name_aliases(Core::InputBufferedFile& file, UnicodeData& unicode_data)
-{
-    Array<u8, 1024> buffer;
-
-    while (TRY(file.can_read_line())) {
-        auto line = TRY(file.read_line(buffer));
-
-        if (line.is_empty() || line.starts_with('#'))
-            continue;
-
-        auto segments = line.split_view(';', SplitBehavior::KeepEmpty);
-        VERIFY(segments.size() == 3);
-
-        auto code_point = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[0].trim_whitespace());
-        auto alias = segments[1].trim_whitespace();
-        auto reason = segments[2].trim_whitespace();
-
-        if (reason == "abbreviation"sv) {
-            auto index = unicode_data.unique_strings.ensure(alias);
-            unicode_data.code_point_abbreviations.set(*code_point, index);
-        } else if (reason.is_one_of("correction"sv, "control"sv)) {
-            if (!unicode_data.code_point_display_name_aliases.contains(*code_point)) {
-                auto index = unicode_data.unique_strings.ensure(alias);
-                unicode_data.code_point_display_name_aliases.set(*code_point, index);
-            }
-        }
-    }
-
-    return {};
-}
-
 static ErrorOr<void> parse_value_alias_list(Core::InputBufferedFile& file, StringView desired_category, Vector<ByteString> const& value_list, Vector<Alias>& prop_aliases, bool primary_value_is_first = true, bool sanitize_alias = false)
 {
     TRY(file.seek(0, SeekMode::SetPosition));
@@ -550,68 +500,6 @@ static ErrorOr<void> parse_normalization_props(Core::InputBufferedFile& file, Un
     return {};
 }
 
-static void add_canonical_code_point_name(Unicode::CodePointRange range, StringView name, UnicodeData& unicode_data)
-{
-    // https://www.unicode.org/versions/Unicode15.0.0/ch04.pdf#G142981
-    // FIXME: Implement the NR1 rules for Hangul syllables.
-
-    struct CodePointNameFormat {
-        Unicode::CodePointRange code_point_range;
-        StringView name;
-    };
-
-    // These code point ranges are the NR2 set of name replacements defined by Table 4-8.
-    constexpr Array<CodePointNameFormat, 16> s_ideographic_replacements { {
-        { { 0x3400, 0x4DBF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
-        { { 0x4E00, 0x9FFF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
-        { { 0xF900, 0xFA6D }, "CJK COMPATIBILITY IDEOGRAPH-{:X}"sv },
-        { { 0xFA70, 0xFAD9 }, "CJK COMPATIBILITY IDEOGRAPH-{:X}"sv },
-        { { 0x17000, 0x187F7 }, "TANGUT IDEOGRAPH-{:X}"sv },
-        { { 0x18B00, 0x18CD5 }, "KHITAN SMALL SCRIPT CHARACTER-{:X}"sv },
-        { { 0x18D00, 0x18D08 }, "TANGUT IDEOGRAPH-{:X}"sv },
-        { { 0x1B170, 0x1B2FB }, "NUSHU CHARACTER-{:X}"sv },
-        { { 0x20000, 0x2A6DF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
-        { { 0x2A700, 0x2B739 }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
-        { { 0x2B740, 0x2B81D }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
-        { { 0x2B820, 0x2CEA1 }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
-        { { 0x2CEB0, 0x2EBE0 }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
-        { { 0x2F800, 0x2FA1D }, "CJK COMPATIBILITY IDEOGRAPH-{:X}"sv },
-        { { 0x30000, 0x3134A }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
-        { { 0x31350, 0x323AF }, "CJK UNIFIED IDEOGRAPH-{:X}"sv },
-    } };
-
-    auto it = find_if(s_ideographic_replacements.begin(), s_ideographic_replacements.end(),
-        [&](auto const& replacement) {
-            return replacement.code_point_range.first == range.first;
-        });
-
-    if (it != s_ideographic_replacements.end()) {
-        auto index = unicode_data.unique_strings.ensure(it->name);
-        unicode_data.code_point_display_names.append({ it->code_point_range, index });
-        return;
-    }
-
-    it = find_if(s_ideographic_replacements.begin(), s_ideographic_replacements.end(),
-        [&](auto const& replacement) {
-            return (replacement.code_point_range.first <= range.first) && (range.first <= replacement.code_point_range.last);
-        });
-
-    if (it != s_ideographic_replacements.end()) {
-        // Drop code points that will have been captured by a range defined by the ideographic replacements.
-        return;
-    }
-
-    if (auto alias = unicode_data.code_point_display_name_aliases.get(range.first); alias.has_value()) {
-        // NR4 states that control code points have a null string as their name. Our implementation
-        // uses the control code's alias as its display name.
-        unicode_data.code_point_display_names.append({ range, *alias });
-        return;
-    }
-
-    auto index = unicode_data.unique_strings.ensure(name);
-    unicode_data.code_point_display_names.append({ range, index });
-}
-
 static Optional<CodePointDecomposition> parse_decomposition_mapping(StringView string, UnicodeData& unicode_data)
 {
     if (string.is_empty())
@@ -660,29 +548,6 @@ static void add_composition_mapping(u32 code_point, CodePointDecomposition& deco
     unicode_data.composition_mappings.ensure(first_code_point).append(CodePointComposition { .second_code_point = second_code_point, .combined_code_point = code_point });
 }
 
-static ErrorOr<void> parse_block_display_names(Core::InputBufferedFile& file, UnicodeData& unicode_data)
-{
-    Array<u8, 1024> buffer;
-    while (TRY(file.can_read_line())) {
-        auto line = TRY(file.read_line(buffer));
-        if (line.is_empty() || line.starts_with('#'))
-            continue;
-
-        auto segments = line.split_view(';', SplitBehavior::KeepEmpty);
-        VERIFY(segments.size() == 2);
-
-        auto code_point_range = parse_code_point_range(segments[0].trim_whitespace());
-        auto display_name = segments[1].trim_whitespace();
-
-        auto index = unicode_data.unique_strings.ensure(display_name);
-        unicode_data.block_display_names.append({ code_point_range, index });
-    }
-
-    TRY(file.seek(0, SeekMode::SetPosition));
-
-    return {};
-}
-
 static ErrorOr<void> parse_unicode_data(Core::InputBufferedFile& file, UnicodeData& unicode_data)
 {
     Optional<u32> code_point_range_start;
@@ -719,9 +584,6 @@ static ErrorOr<void> parse_unicode_data(Core::InputBufferedFile& file, UnicodeDa
         data.casing.simple_lowercase_mapping = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[13]);
         data.casing.simple_titlecase_mapping = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[14]);
 
-        if (auto abbreviation = unicode_data.code_point_abbreviations.get(data.code_point); abbreviation.has_value())
-            data.abbreviation = *abbreviation;
-
         if (!assigned_code_point_range_start.has_value())
             assigned_code_point_range_start = data.code_point;
 
@@ -742,10 +604,8 @@ static ErrorOr<void> parse_unicode_data(Core::InputBufferedFile& file, UnicodeDa
             data.name = data.name.substring(1, data.name.length() - 8);
             code_point_range_start.clear();
 
-            add_canonical_code_point_name(code_point_range, data.name, unicode_data);
             unicode_data.code_point_bidirectional_classes.append({ code_point_range, data.bidi_class });
         } else {
-            add_canonical_code_point_name({ data.code_point, data.code_point }, data.name, unicode_data);
             unicode_data.code_point_bidirectional_classes.append({ { data.code_point, data.code_point }, data.bidi_class });
 
             if ((data.code_point > 0) && (data.code_point - previous_code_point) != 1) {
@@ -905,7 +765,6 @@ static ErrorOr<void> generate_unicode_data_implementation(Core::InputBufferedFil
     StringBuilder builder;
     SourceGenerator generator { builder };
 
-    generator.set("string_index_type"sv, unicode_data.unique_strings.type_that_fits());
     generator.set("special_casing_size", ByteString::number(unicode_data.special_casing.size()));
     generator.set("case_folding_size", ByteString::number(unicode_data.case_folding.size()));
 
@@ -927,8 +786,6 @@ static ErrorOr<void> generate_unicode_data_implementation(Core::InputBufferedFil
 namespace Unicode {
 )~~~");
 
-    unicode_data.unique_strings.generate(generator);
-
     auto append_list_and_size = [&](auto const& list, StringView format) {
         if (list.is_empty()) {
             generator.append(", {}, 0");
@@ -998,11 +855,6 @@ struct CasingTable {
     u32 case_folding_size { 0 };
 };
 
-struct CodePointAbbreviation {
-    u32 code_point { 0 };
-    @string_index_type@ abbreviation { 0 };
-};
-
 template<typename MappingType>
 struct CodePointComparator {
     constexpr int operator()(u32 code_point, MappingType const& mapping)
@@ -1011,30 +863,6 @@ struct CodePointComparator {
     }
 };
 
-struct BlockNameData {
-    CodePointRange code_point_range {};
-    @string_index_type@ display_name { 0 };
-};
-
-struct BlockNameComparator : public CodePointRangeComparator {
-    constexpr int operator()(u32 code_point, BlockNameData const& name)
-    {
-        return CodePointRangeComparator::operator()(code_point, name.code_point_range);
-    }
-};
-
-struct CodePointName {
-    CodePointRange code_point_range {};
-    @string_index_type@ display_name { 0 };
-};
-
-struct CodePointNameComparator : public CodePointRangeComparator {
-    constexpr int operator()(u32 code_point, CodePointName const& name)
-    {
-        return CodePointRangeComparator::operator()(code_point, name.code_point_range);
-    }
-};
-
 struct BidiClassData {
     CodePointRange code_point_range {};
     BidirectionalClass bidi_class {};
@@ -1106,7 +934,6 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
 )~~~");
     };
 
-    append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; });
     append_code_point_mappings("decomposition"sv, "CodePointDecompositionRaw"sv, unicode_data.code_points_with_decomposition_mapping, [](auto const& data) { return data.decomposition_mapping; });
 
     size_t composition_mappings_size = 0;
@@ -1254,39 +1081,6 @@ static constexpr Array<@type@, @size@> @name@ { {
     TRY(append_code_point_tables("s_word_break_properties"sv, unicode_data.word_break_tables, append_property_table));
     TRY(append_code_point_tables("s_sentence_break_properties"sv, unicode_data.sentence_break_tables, append_property_table));
 
-    auto append_code_point_display_names = [&](StringView type, StringView name, auto const& display_names) {
-        constexpr size_t max_values_per_row = 30;
-        size_t values_in_current_row = 0;
-
-        generator.set("type", type);
-        generator.set("name", name);
-        generator.set("size", ByteString::number(display_names.size()));
-
-        generator.append(R"~~~(
-static constexpr Array<@type@, @size@> @name@ { {
-    )~~~");
-        for (auto const& display_name : display_names) {
-            if (values_in_current_row++ > 0)
-                generator.append(", ");
-
-            generator.set("first", ByteString::formatted("{:#x}", display_name.code_point_range.first));
-            generator.set("last", ByteString::formatted("{:#x}", display_name.code_point_range.last));
-            generator.set("name", ByteString::number(display_name.name));
-            generator.append("{ { @first@, @last@ }, @name@ }");
-
-            if (values_in_current_row == max_values_per_row) {
-                values_in_current_row = 0;
-                generator.append(",\n    ");
-            }
-        }
-        generator.append(R"~~~(
-} };
-)~~~");
-    };
-
-    append_code_point_display_names("BlockNameData"sv, "s_block_display_names"sv, unicode_data.block_display_names);
-    append_code_point_display_names("CodePointName"sv, "s_code_point_display_names"sv, unicode_data.code_point_display_names);
-
     {
         constexpr size_t max_bidi_classes_per_row = 20;
         size_t bidi_classes_in_current_row = 0;
@@ -1315,44 +1109,6 @@ static constexpr Array<BidiClassData, @size@> s_bidirectional_classes { {
     }
 
     generator.append(R"~~~(
-Optional<StringView> code_point_block_display_name(u32 code_point)
-{
-    if (auto const* entry = binary_search(s_block_display_names, code_point, nullptr, BlockNameComparator {}))
-        return decode_string(entry->display_name);
-
-    return {};
-}
-
-ReadonlySpan<BlockName> block_display_names()
-{
-    static auto display_names = []() {
-        Array<BlockName, s_block_display_names.size()> display_names;
-
-        for (size_t i = 0; i < s_block_display_names.size(); ++i) {
-            auto const& display_name = s_block_display_names[i];
-            display_names[i] = { display_name.code_point_range, decode_string(display_name.display_name) };
-        }
-
-        return display_names;
-    }();
-
-    return display_names.span();
-}
-
-Optional<ByteString> code_point_display_name(u32 code_point)
-{
-    if (auto const* entry = binary_search(s_code_point_display_names, code_point, nullptr, CodePointNameComparator {})) {
-        auto display_name = decode_string(entry->display_name);
-
-        if (display_name.ends_with("{:X}"sv))
-            return ByteString::formatted(display_name, code_point);
-
-        return display_name;
-    }
-
-    return {};
-}
-
 static CasingTable const& casing_table_for_code_point(u32 code_point)
 {
     auto stage1_index = code_point >> @CODE_POINT_TABLES_LSB_COUNT@;
@@ -1411,17 +1167,6 @@ ReadonlySpan<CaseFolding> case_folding_mapping(u32 code_point)
     return s_case_folding.span().slice(casing_table.case_folding_start_index, casing_table.case_folding_size);
 }
 
-Optional<StringView> code_point_abbreviation(u32 code_point)
-{
-    auto const* mapping = binary_search(s_abbreviation_mappings, code_point, nullptr, CodePointComparator<CodePointAbbreviation> {});
-    if (mapping == nullptr)
-        return {};
-    if (mapping->abbreviation == 0)
-        return {};
-
-    return decode_string(mapping->abbreviation);
-}
-
 Optional<CodePointDecomposition const> code_point_decomposition(u32 code_point)
 {
     auto const* mapping = binary_search(s_decomposition_mappings, code_point, nullptr, CodePointComparator<CodePointDecompositionRaw> {});
@@ -1842,10 +1587,8 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
     StringView derived_binary_prop_path;
     StringView prop_alias_path;
     StringView prop_value_alias_path;
-    StringView name_alias_path;
     StringView scripts_path;
     StringView script_extensions_path;
-    StringView blocks_path;
     StringView emoji_data_path;
     StringView normalization_path;
     StringView grapheme_break_path;
@@ -1864,10 +1607,8 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
     args_parser.add_option(derived_binary_prop_path, "Path to DerivedBinaryProperties.txt file", "derived-binary-prop-path", 'b', "derived-binary-prop-path");
     args_parser.add_option(prop_alias_path, "Path to PropertyAliases.txt file", "prop-alias-path", 'a', "prop-alias-path");
     args_parser.add_option(prop_value_alias_path, "Path to PropertyValueAliases.txt file", "prop-value-alias-path", 'v', "prop-value-alias-path");
-    args_parser.add_option(name_alias_path, "Path to NameAliases.txt file", "name-alias-path", 'm', "name-alias-path");
     args_parser.add_option(scripts_path, "Path to Scripts.txt file", "scripts-path", 'r', "scripts-path");
     args_parser.add_option(script_extensions_path, "Path to ScriptExtensions.txt file", "script-extensions-path", 'x', "script-extensions-path");
-    args_parser.add_option(blocks_path, "Path to Blocks.txt file", "blocks-path", 'k', "blocks-path");
     args_parser.add_option(emoji_data_path, "Path to emoji-data.txt file", "emoji-data-path", 'e', "emoji-data-path");
     args_parser.add_option(normalization_path, "Path to DerivedNormalizationProps.txt file", "normalization-path", 'n', "normalization-path");
     args_parser.add_option(grapheme_break_path, "Path to GraphemeBreakProperty.txt file", "grapheme-break-path", 'f', "grapheme-break-path");
@@ -1886,10 +1627,8 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
     auto derived_binary_prop_file = TRY(open_file(derived_binary_prop_path, Core::File::OpenMode::Read));
     auto prop_alias_file = TRY(open_file(prop_alias_path, Core::File::OpenMode::Read));
     auto prop_value_alias_file = TRY(open_file(prop_value_alias_path, Core::File::OpenMode::Read));
-    auto name_alias_file = TRY(open_file(name_alias_path, Core::File::OpenMode::Read));
     auto scripts_file = TRY(open_file(scripts_path, Core::File::OpenMode::Read));
     auto script_extensions_file = TRY(open_file(script_extensions_path, Core::File::OpenMode::Read));
-    auto blocks_file = TRY(open_file(blocks_path, Core::File::OpenMode::Read));
     auto emoji_data_file = TRY(open_file(emoji_data_path, Core::File::OpenMode::Read));
     auto normalization_file = TRY(open_file(normalization_path, Core::File::OpenMode::Read));
     auto grapheme_break_file = TRY(open_file(grapheme_break_path, Core::File::OpenMode::Read));
@@ -1908,8 +1647,6 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
     TRY(parse_alias_list(*prop_alias_file, unicode_data.prop_list, unicode_data.prop_aliases));
     TRY(parse_prop_list(*scripts_file, unicode_data.script_list));
     TRY(parse_prop_list(*script_extensions_file, unicode_data.script_extensions, true));
-    TRY(parse_block_display_names(*blocks_file, unicode_data));
-    TRY(parse_name_aliases(*name_alias_file, unicode_data));
     TRY(parse_prop_list(*grapheme_break_file, unicode_data.grapheme_break_props));
     TRY(parse_prop_list(*word_break_file, unicode_data.word_break_props));
     TRY(parse_prop_list(*sentence_break_file, unicode_data.sentence_break_props));

+ 0 - 41
Tests/LibUnicode/TestUnicodeCharacterTypes.cpp

@@ -330,15 +330,6 @@ TEST_CASE(script)
     }
 }
 
-TEST_CASE(block)
-{
-    for (u32 code_point = 0x0000; code_point <= 0x007F; ++code_point)
-        EXPECT_EQ("Basic Latin"sv, Unicode::code_point_block_display_name(code_point).value());
-
-    for (u32 code_point = 0x0370; code_point <= 0x03FF; ++code_point)
-        EXPECT_EQ("Greek and Coptic"sv, Unicode::code_point_block_display_name(code_point).value());
-}
-
 TEST_CASE(script_extension)
 {
     auto script = [](StringView name) {
@@ -390,38 +381,6 @@ TEST_CASE(script_extension)
     EXPECT(Unicode::code_point_has_script_extension(0x101fd, script_inherited));
 }
 
-TEST_CASE(code_point_display_name)
-{
-    auto code_point_display_name = [](u32 code_point) {
-        auto name = Unicode::code_point_display_name(code_point);
-        VERIFY(name.has_value());
-        return name.release_value();
-    };
-
-    // Control code points.
-    EXPECT_EQ(code_point_display_name(0), "NULL"sv);
-    EXPECT_EQ(code_point_display_name(1), "START OF HEADING"sv);
-    EXPECT_EQ(code_point_display_name(0xa), "LINE FEED"sv);
-
-    // Ideographic code points (which already appeared in a range in UnicodeData.txt).
-    EXPECT_EQ(code_point_display_name(0x3400), "CJK UNIFIED IDEOGRAPH-3400"sv);
-    EXPECT_EQ(code_point_display_name(0x3401), "CJK UNIFIED IDEOGRAPH-3401"sv);
-    EXPECT_EQ(code_point_display_name(0x3402), "CJK UNIFIED IDEOGRAPH-3402"sv);
-    EXPECT_EQ(code_point_display_name(0x4dbf), "CJK UNIFIED IDEOGRAPH-4DBF"sv);
-
-    EXPECT_EQ(code_point_display_name(0x20000), "CJK UNIFIED IDEOGRAPH-20000"sv);
-    EXPECT_EQ(code_point_display_name(0x20001), "CJK UNIFIED IDEOGRAPH-20001"sv);
-    EXPECT_EQ(code_point_display_name(0x20002), "CJK UNIFIED IDEOGRAPH-20002"sv);
-    EXPECT_EQ(code_point_display_name(0x2a6df), "CJK UNIFIED IDEOGRAPH-2A6DF"sv);
-    EXPECT(!Unicode::code_point_display_name(0x2a6e0).has_value());
-
-    // Ideographic code points (which appeared individually in UnicodeData.txt and were coalesced into a range).
-    EXPECT_EQ(code_point_display_name(0x2f800), "CJK COMPATIBILITY IDEOGRAPH-2F800"sv);
-    EXPECT_EQ(code_point_display_name(0x2f801), "CJK COMPATIBILITY IDEOGRAPH-2F801"sv);
-    EXPECT_EQ(code_point_display_name(0x2f802), "CJK COMPATIBILITY IDEOGRAPH-2F802"sv);
-    EXPECT_EQ(code_point_display_name(0x2fa1d), "CJK COMPATIBILITY IDEOGRAPH-2FA1D"sv);
-}
-
 TEST_CASE(code_point_bidirectional_character_type)
 {
     auto code_point_bidi_class = [](u32 code_point) {

+ 0 - 4
Userland/Libraries/LibUnicode/CharacterTypes.cpp

@@ -21,11 +21,7 @@
 
 namespace Unicode {
 
-Optional<ByteString> __attribute__((weak)) code_point_display_name(u32) { return {}; }
-Optional<StringView> __attribute__((weak)) code_point_block_display_name(u32) { return {}; }
-Optional<StringView> __attribute__((weak)) code_point_abbreviation(u32) { return {}; }
 u32 __attribute__((weak)) canonical_combining_class(u32) { return {}; }
-ReadonlySpan<BlockName> __attribute__((weak)) block_display_names() { return {}; }
 
 u32 __attribute__((weak)) to_unicode_lowercase(u32 code_point)
 {

+ 0 - 11
Userland/Libraries/LibUnicode/CharacterTypes.h

@@ -29,17 +29,6 @@ struct CodePointRangeComparator {
     }
 };
 
-struct BlockName {
-    CodePointRange code_point_range {};
-    StringView display_name;
-};
-
-Optional<ByteString> code_point_display_name(u32 code_point);
-Optional<StringView> code_point_block_display_name(u32 code_point);
-Optional<StringView> code_point_abbreviation(u32 code_point);
-
-ReadonlySpan<BlockName> block_display_names();
-
 u32 canonical_combining_class(u32 code_point);
 
 // Note: The single code point case conversions only perform simple case folding.