há 1 ano atrás · ab56b8c8dc
--- a/Meta/CMake/unicode_data.cmake
+++ b/Meta/CMake/unicode_data.cmake
@@ -43,15 +43,6 @@ set(EMOJI_DATA_PATH "${UCD_PATH}/${EMOJI_DATA_SOURCE}")
 
				 set(NORM_PROPS_SOURCE "DerivedNormalizationProps.txt")
			
 
				 set(NORM_PROPS_PATH "${UCD_PATH}/${NORM_PROPS_SOURCE}")
			
 
				 
			
 
				-set(GRAPHEME_BREAK_PROP_SOURCE "auxiliary/GraphemeBreakProperty.txt")
			
 
				-set(GRAPHEME_BREAK_PROP_PATH "${UCD_PATH}/${GRAPHEME_BREAK_PROP_SOURCE}")
			
 
				-
			
 
				-set(WORD_BREAK_PROP_SOURCE "auxiliary/WordBreakProperty.txt")
			
 
				-set(WORD_BREAK_PROP_PATH "${UCD_PATH}/${WORD_BREAK_PROP_SOURCE}")
			
 
				-
			
 
				-set(SENTENCE_BREAK_PROP_SOURCE "auxiliary/SentenceBreakProperty.txt")
			
 
				-set(SENTENCE_BREAK_PROP_PATH "${UCD_PATH}/${SENTENCE_BREAK_PROP_SOURCE}")
			
 
				-
			
 
				 string(REGEX REPLACE "([0-9]+\\.[0-9]+)\\.[0-9]+" "\\1" EMOJI_VERSION "${UCD_VERSION}")
			
 
				 set(EMOJI_TEST_URL "https://www.unicode.org/Public/emoji/${EMOJI_VERSION}/emoji-test.txt")
			
 
				 set(EMOJI_TEST_PATH "${UCD_PATH}/emoji-test.txt")
			
@@ -76,9 +67,6 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
 
				         extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPT_EXTENSIONS_SOURCE}" "${SCRIPT_EXTENSIONS_PATH}")
			
 
				         extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${EMOJI_DATA_SOURCE}" "${EMOJI_DATA_PATH}")
			
 
				         extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${NORM_PROPS_SOURCE}" "${NORM_PROPS_PATH}")
			
 
				-        extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${GRAPHEME_BREAK_PROP_SOURCE}" "${GRAPHEME_BREAK_PROP_PATH}")
			
 
				-        extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${WORD_BREAK_PROP_SOURCE}" "${WORD_BREAK_PROP_PATH}")
			
 
				-        extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SENTENCE_BREAK_PROP_SOURCE}" "${SENTENCE_BREAK_PROP_PATH}")
			
 
				 
			
 
				         download_file("${EMOJI_TEST_URL}" "${EMOJI_TEST_PATH}" SHA256 "${EMOJI_SHA256}")
			
 
				     else()
			
@@ -103,7 +91,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
 
				         "${UCD_VERSION_FILE}"
			
 
				         "${UNICODE_DATA_HEADER}"
			
 
				         "${UNICODE_DATA_IMPLEMENTATION}"
			
 
				-        arguments -u "${UNICODE_DATA_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -e "${EMOJI_DATA_PATH}" -n "${NORM_PROPS_PATH}" -f "${GRAPHEME_BREAK_PROP_PATH}" -w "${WORD_BREAK_PROP_PATH}" -i "${SENTENCE_BREAK_PROP_PATH}"
			
 
				+        arguments -u "${UNICODE_DATA_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -p "${PROP_LIST_PATH}" -d "${DERIVED_CORE_PROP_PATH}" -b "${DERIVED_BINARY_PROP_PATH}" -a "${PROP_ALIAS_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}" -e "${EMOJI_DATA_PATH}" -n "${NORM_PROPS_PATH}"
			
 
				     )
			
 
				     invoke_generator(
			
 
				         "EmojiData"
			
--- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp
@@ -98,17 +98,10 @@ struct UnicodeData {
 
				     // FIXME: We are not yet doing anything with this data. It will be needed for String.prototype.normalize.
			
 
				     NormalizationProps normalization_props;
			
 
				 
			
 
				-    PropList grapheme_break_props;
			
 
				-    PropList word_break_props;
			
 
				-    PropList sentence_break_props;
			
 
				-
			
 
				     CodePointTables<PropertyTable> general_category_tables;
			
 
				     CodePointTables<PropertyTable> property_tables;
			
 
				     CodePointTables<PropertyTable> script_tables;
			
 
				     CodePointTables<PropertyTable> script_extension_tables;
			
 
				-    CodePointTables<PropertyTable> grapheme_break_tables;
			
 
				-    CodePointTables<PropertyTable> word_break_tables;
			
 
				-    CodePointTables<PropertyTable> sentence_break_tables;
			
 
				 
			
 
				     HashTable<ByteString> bidirectional_classes;
			
 
				     Vector<CodePointBidiClass> code_point_bidirectional_classes;
			
@@ -439,9 +432,6 @@ namespace Unicode {
 
				     generate_enum("GeneralCategory"sv, {}, unicode_data.general_categories.keys(), unicode_data.general_category_aliases);
			
 
				     generate_enum("Property"sv, {}, unicode_data.prop_list.keys(), unicode_data.prop_aliases);
			
 
				     generate_enum("Script"sv, {}, unicode_data.script_list.keys(), unicode_data.script_aliases);
			
 
				-    generate_enum("GraphemeBreakProperty"sv, {}, unicode_data.grapheme_break_props.keys());
			
 
				-    generate_enum("WordBreakProperty"sv, {}, unicode_data.word_break_props.keys());
			
 
				-    generate_enum("SentenceBreakProperty"sv, {}, unicode_data.sentence_break_props.keys());
			
 
				     generate_enum("BidirectionalClass"sv, {}, unicode_data.bidirectional_classes.values());
			
 
				 
			
 
				     generator.append(R"~~~(
			
@@ -560,9 +550,6 @@ static constexpr Array<@type@, @size@> @name@ { {
 
				     TRY(append_code_point_tables("s_properties"sv, unicode_data.property_tables, append_property_table));
			
 
				     TRY(append_code_point_tables("s_scripts"sv, unicode_data.script_tables, append_property_table));
			
 
				     TRY(append_code_point_tables("s_script_extensions"sv, unicode_data.script_extension_tables, append_property_table));
			
 
				-    TRY(append_code_point_tables("s_grapheme_break_properties"sv, unicode_data.grapheme_break_tables, append_property_table));
			
 
				-    TRY(append_code_point_tables("s_word_break_properties"sv, unicode_data.word_break_tables, append_property_table));
			
 
				-    TRY(append_code_point_tables("s_sentence_break_properties"sv, unicode_data.sentence_break_tables, append_property_table));
			
 
				 
			
 
				     {
			
 
				         constexpr size_t max_bidi_classes_per_row = 20;
			
@@ -654,10 +641,6 @@ bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@)
 
				     TRY(append_prop_search("Script"sv, "script_extension"sv, "s_script_extensions"sv));
			
 
				     TRY(append_from_string("Script"sv, "script"sv, unicode_data.script_list, unicode_data.script_aliases));
			
 
				 
			
 
				-    TRY(append_prop_search("GraphemeBreakProperty"sv, "grapheme_break_property"sv, "s_grapheme_break_properties"sv));
			
 
				-    TRY(append_prop_search("WordBreakProperty"sv, "word_break_property"sv, "s_word_break_properties"sv));
			
 
				-    TRY(append_prop_search("SentenceBreakProperty"sv, "sentence_break_property"sv, "s_sentence_break_properties"sv));
			
 
				-
			
 
				     TRY(append_from_string("BidirectionalClass"sv, "bidirectional_class"sv, unicode_data.bidirectional_classes, {}));
			
 
				 
			
 
				     generator.append(R"~~~(
			
@@ -921,18 +904,12 @@ static ErrorOr<void> create_code_point_tables(UnicodeData& unicode_data)
 
				     auto property_metadata = TRY(PropertyMetadata::create(unicode_data.prop_list));
			
 
				     auto script_metadata = TRY(PropertyMetadata::create(unicode_data.script_list));
			
 
				     auto script_extension_metadata = TRY(PropertyMetadata::create(unicode_data.script_extensions));
			
 
				-    auto grapheme_break_metadata = TRY(PropertyMetadata::create(unicode_data.grapheme_break_props));
			
 
				-    auto word_break_metadata = TRY(PropertyMetadata::create(unicode_data.word_break_props));
			
 
				-    auto sentence_break_metadata = TRY(PropertyMetadata::create(unicode_data.sentence_break_props));
			
 
				 
			
 
				     for (u32 code_point = 0; code_point <= MAX_CODE_POINT; ++code_point) {
			
 
				         TRY(update_property_tables(code_point, unicode_data.general_category_tables, general_category_metadata));
			
 
				         TRY(update_property_tables(code_point, unicode_data.property_tables, property_metadata));
			
 
				         TRY(update_property_tables(code_point, unicode_data.script_tables, script_metadata));
			
 
				         TRY(update_property_tables(code_point, unicode_data.script_extension_tables, script_extension_metadata));
			
 
				-        TRY(update_property_tables(code_point, unicode_data.grapheme_break_tables, grapheme_break_metadata));
			
 
				-        TRY(update_property_tables(code_point, unicode_data.word_break_tables, word_break_metadata));
			
 
				-        TRY(update_property_tables(code_point, unicode_data.sentence_break_tables, sentence_break_metadata));
			
 
				     }
			
 
				 
			
 
				     return {};
			
@@ -953,9 +930,6 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
 
				     StringView script_extensions_path;
			
 
				     StringView emoji_data_path;
			
 
				     StringView normalization_path;
			
 
				-    StringView grapheme_break_path;
			
 
				-    StringView word_break_path;
			
 
				-    StringView sentence_break_path;
			
 
				 
			
 
				     Core::ArgsParser args_parser;
			
 
				     args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path");
			
@@ -971,9 +945,6 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
 
				     args_parser.add_option(script_extensions_path, "Path to ScriptExtensions.txt file", "script-extensions-path", 'x', "script-extensions-path");
			
 
				     args_parser.add_option(emoji_data_path, "Path to emoji-data.txt file", "emoji-data-path", 'e', "emoji-data-path");
			
 
				     args_parser.add_option(normalization_path, "Path to DerivedNormalizationProps.txt file", "normalization-path", 'n', "normalization-path");
			
 
				-    args_parser.add_option(grapheme_break_path, "Path to GraphemeBreakProperty.txt file", "grapheme-break-path", 'f', "grapheme-break-path");
			
 
				-    args_parser.add_option(word_break_path, "Path to WordBreakProperty.txt file", "word-break-path", 'w', "word-break-path");
			
 
				-    args_parser.add_option(sentence_break_path, "Path to SentenceBreakProperty.txt file", "sentence-break-path", 'i', "sentence-break-path");
			
 
				     args_parser.parse(arguments);
			
 
				 
			
 
				     auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write));
			
@@ -989,9 +960,6 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
 
				     auto script_extensions_file = TRY(open_file(script_extensions_path, Core::File::OpenMode::Read));
			
 
				     auto emoji_data_file = TRY(open_file(emoji_data_path, Core::File::OpenMode::Read));
			
 
				     auto normalization_file = TRY(open_file(normalization_path, Core::File::OpenMode::Read));
			
 
				-    auto grapheme_break_file = TRY(open_file(grapheme_break_path, Core::File::OpenMode::Read));
			
 
				-    auto word_break_file = TRY(open_file(word_break_path, Core::File::OpenMode::Read));
			
 
				-    auto sentence_break_file = TRY(open_file(sentence_break_path, Core::File::OpenMode::Read));
			
 
				 
			
 
				     UnicodeData unicode_data {};
			
 
				     TRY(parse_prop_list(*derived_general_category_file, unicode_data.general_categories));
			
@@ -1003,9 +971,6 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
 
				     TRY(parse_alias_list(*prop_alias_file, unicode_data.prop_list, unicode_data.prop_aliases));
			
 
				     TRY(parse_prop_list(*scripts_file, unicode_data.script_list));
			
 
				     TRY(parse_prop_list(*script_extensions_file, unicode_data.script_extensions, true));
			
 
				-    TRY(parse_prop_list(*grapheme_break_file, unicode_data.grapheme_break_props));
			
 
				-    TRY(parse_prop_list(*word_break_file, unicode_data.word_break_props));
			
 
				-    TRY(parse_prop_list(*sentence_break_file, unicode_data.sentence_break_props));
			
 
				 
			
 
				     populate_general_category_unions(unicode_data.general_categories);
			
 
				     TRY(parse_unicode_data(*unicode_data_file, unicode_data));
			
--- a/Tests/LibUnicode/CMakeLists.txt
+++ b/Tests/LibUnicode/CMakeLists.txt
@@ -1,7 +1,6 @@
 
				 set(TEST_SOURCES
			
 
				     TestEmoji.cpp
			
 
				     TestIDNA.cpp
			
 
				-    TestSegmentation.cpp
			
 
				     TestUnicodeCharacterTypes.cpp
			
 
				     TestUnicodeNormalization.cpp
			
 
				 )
			
--- a/Tests/LibUnicode/TestSegmentation.cpp
+++ b/Tests/LibUnicode/TestSegmentation.cpp
@@ -1,122 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
			
 
				- *
			
 
				- * SPDX-License-Identifier: BSD-2-Clause
			
 
				- */
			
 
				-
			
 
				-#include <AK/Array.h>
			
 
				-#include <AK/StringView.h>
			
 
				-#include <AK/Utf8View.h>
			
 
				-#include <AK/Vector.h>
			
 
				-#include <LibTest/TestCase.h>
			
 
				-#include <LibUnicode/Segmentation.h>
			
 
				-
			
 
				-template<size_t N>
			
 
				-static void test_grapheme_segmentation(StringView string, size_t const (&expected_boundaries)[N])
			
 
				-{
			
 
				-    Vector<size_t> boundaries;
			
 
				-    Utf8View view { string };
			
 
				-
			
 
				-    Unicode::for_each_grapheme_segmentation_boundary(view, [&](auto boundary) {
			
 
				-        boundaries.append(boundary);
			
 
				-        return IterationDecision::Continue;
			
 
				-    });
			
 
				-
			
 
				-    EXPECT_EQ(boundaries, ReadonlySpan<size_t> { expected_boundaries });
			
 
				-}
			
 
				-
			
 
				-TEST_CASE(grapheme_segmentation)
			
 
				-{
			
 
				-    Unicode::for_each_grapheme_segmentation_boundary(Utf8View {}, [&](auto) {
			
 
				-        VERIFY_NOT_REACHED();
			
 
				-        return IterationDecision::Break;
			
 
				-    });
			
 
				-
			
 
				-    test_grapheme_segmentation("a"sv, { 0u, 1u });
			
 
				-    test_grapheme_segmentation("ab"sv, { 0u, 1u, 2u });
			
 
				-    test_grapheme_segmentation("abc"sv, { 0u, 1u, 2u, 3u });
			
 
				-
			
 
				-    test_grapheme_segmentation("a\nb"sv, { 0u, 1u, 2u, 3u });
			
 
				-    test_grapheme_segmentation("a\n\rb"sv, { 0u, 1u, 2u, 3u, 4u });
			
 
				-    test_grapheme_segmentation("a\r\nb"sv, { 0u, 1u, 3u, 4u });
			
 
				-
			
 
				-    test_grapheme_segmentation("aᄀb"sv, { 0u, 1u, 4u, 5u });
			
 
				-    test_grapheme_segmentation("aᄀᄀb"sv, { 0u, 1u, 7u, 8u });
			
 
				-    test_grapheme_segmentation("aᄀᆢb"sv, { 0u, 1u, 7u, 8u });
			
 
				-    test_grapheme_segmentation("aᄀ가b"sv, { 0u, 1u, 7u, 8u });
			
 
				-    test_grapheme_segmentation("aᄀ각b"sv, { 0u, 1u, 7u, 8u });
			
 
				-
			
 
				-    test_grapheme_segmentation("a😀b"sv, { 0u, 1u, 5u, 6u });
			
 
				-    test_grapheme_segmentation("a👨‍👩‍👧‍👦b"sv, { 0u, 1u, 26u, 27u });
			
 
				-    test_grapheme_segmentation("a👩🏼‍❤️‍👨🏻b"sv, { 0u, 1u, 29u, 30u });
			
 
				-}
			
 
				-
			
 
				-TEST_CASE(grapheme_segmentation_indic_conjunct_break)
			
 
				-{
			
 
				-    test_grapheme_segmentation("\u0915"sv, { 0u, 3u });
			
 
				-    test_grapheme_segmentation("\u0915a"sv, { 0u, 3u, 4u });
			
 
				-    test_grapheme_segmentation("\u0915\u0916"sv, { 0u, 3u, 6u });
			
 
				-
			
 
				-    test_grapheme_segmentation("\u0915\u094D\u0916"sv, { 0u, 9u });
			
 
				-
			
 
				-    test_grapheme_segmentation("\u0915\u09BC\u09CD\u094D\u0916"sv, { 0u, 15u });
			
 
				-    test_grapheme_segmentation("\u0915\u094D\u09BC\u09CD\u0916"sv, { 0u, 15u });
			
 
				-
			
 
				-    test_grapheme_segmentation("\u0915\u09BC\u09CD\u094D\u09BC\u09CD\u0916"sv, { 0u, 21u });
			
 
				-    test_grapheme_segmentation("\u0915\u09BC\u09CD\u09BC\u09CD\u094D\u0916"sv, { 0u, 21u });
			
 
				-    test_grapheme_segmentation("\u0915\u094D\u09BC\u09CD\u09BC\u09CD\u0916"sv, { 0u, 21u });
			
 
				-
			
 
				-    test_grapheme_segmentation("\u0915\u09BC\u09CD\u09BC\u09CD\u094D\u09BC\u09CD\u0916"sv, { 0u, 27u });
			
 
				-    test_grapheme_segmentation("\u0915\u09BC\u09CD\u094D\u09BC\u09CD\u09BC\u09CD\u0916"sv, { 0u, 27u });
			
 
				-
			
 
				-    test_grapheme_segmentation("\u0915\u09BC\u09CD\u09BC\u09CD\u094D\u09BC\u09CD\u09BC\u09CD\u0916"sv, { 0u, 33u });
			
 
				-}
			
 
				-
			
 
				-template<size_t N>
			
 
				-static void test_word_segmentation(StringView string, size_t const (&expected_boundaries)[N])
			
 
				-{
			
 
				-    Vector<size_t> boundaries;
			
 
				-    Utf8View view { string };
			
 
				-
			
 
				-    Unicode::for_each_word_segmentation_boundary(view, [&](auto boundary) {
			
 
				-        boundaries.append(boundary);
			
 
				-        return IterationDecision::Continue;
			
 
				-    });
			
 
				-
			
 
				-    EXPECT_EQ(boundaries, ReadonlySpan<size_t> { expected_boundaries });
			
 
				-}
			
 
				-
			
 
				-TEST_CASE(word_segmentation)
			
 
				-{
			
 
				-    Unicode::for_each_word_segmentation_boundary(Utf8View {}, [&](auto) {
			
 
				-        VERIFY_NOT_REACHED();
			
 
				-        return IterationDecision::Break;
			
 
				-    });
			
 
				-
			
 
				-    test_word_segmentation("a"sv, { 0u, 1u });
			
 
				-    test_word_segmentation("ab"sv, { 0u, 2u });
			
 
				-    test_word_segmentation("abc"sv, { 0u, 3u });
			
 
				-
			
 
				-    test_word_segmentation("ab cd"sv, { 0u, 2u, 3u, 5u });
			
 
				-    test_word_segmentation("ab  cd"sv, { 0u, 2u, 4u, 6u });
			
 
				-    test_word_segmentation("ab\tcd"sv, { 0u, 2u, 3u, 5u });
			
 
				-    test_word_segmentation("ab\ncd"sv, { 0u, 2u, 3u, 5u });
			
 
				-    test_word_segmentation("ab\n\rcd"sv, { 0u, 2u, 3u, 4u, 6u });
			
 
				-    test_word_segmentation("ab\r\ncd"sv, { 0u, 2u, 4u, 6u });
			
 
				-
			
 
				-    test_word_segmentation("a😀b"sv, { 0u, 1u, 5u, 6u });
			
 
				-    test_word_segmentation("a👨‍👩‍👧‍👦b"sv, { 0u, 1u, 26u, 27u });
			
 
				-    test_word_segmentation("a👩🏼‍❤️‍👨🏻b"sv, { 0u, 1u, 29u, 30u });
			
 
				-
			
 
				-    test_word_segmentation("ab 12 cd"sv, { 0u, 2u, 3u, 5u, 6u, 8u });
			
 
				-    test_word_segmentation("ab 1.2 cd"sv, { 0u, 2u, 3u, 6u, 7u, 9u });
			
 
				-    test_word_segmentation("ab 12.34 cd"sv, { 0u, 2u, 3u, 8u, 9u, 11u });
			
 
				-    test_word_segmentation("ab example.com cd"sv, { 0u, 2u, 3u, 14u, 15u, 17u });
			
 
				-
			
 
				-    test_word_segmentation("ab can't cd"sv, { 0u, 2u, 3u, 8u, 9u, 11u });
			
 
				-    test_word_segmentation("ab \"can't\" cd"sv, { 0u, 2u, 3u, 4u, 9u, 10u, 11u, 13u });
			
 
				-
			
 
				-    test_word_segmentation(
			
 
				-        "The quick (“brown”) fox can’t jump 32.3 feet, right?"sv,
			
 
				-        { 0u, 3u, 4u, 9u, 10u, 11u, 14u, 19u, 22u, 23u, 24u, 27u, 28u, 35u, 36u, 40u, 41u, 45u, 46u, 50u, 51u, 52u, 57u, 58u });
			
 
				-}
			
--- a/Userland/Libraries/LibUnicode/CMakeLists.txt
+++ b/Userland/Libraries/LibUnicode/CMakeLists.txt
@@ -6,7 +6,6 @@ set(SOURCES
 
				     Emoji.cpp
			
 
				     IDNA.cpp
			
 
				     Normalize.cpp
			
 
				-    Segmentation.cpp
			
 
				     String.cpp
			
 
				     ${UNICODE_DATA_SOURCES}
			
 
				 )
			
--- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
@@ -88,10 +88,6 @@ Optional<Script> __attribute__((weak)) script_from_string(StringView) { return {
 
				 bool __attribute__((weak)) code_point_has_script(u32, Script) { return {}; }
			
 
				 bool __attribute__((weak)) code_point_has_script_extension(u32, Script) { return {}; }
			
 
				 
			
 
				-bool __attribute__((weak)) code_point_has_grapheme_break_property(u32, GraphemeBreakProperty) { return {}; }
			
 
				-bool __attribute__((weak)) code_point_has_word_break_property(u32, WordBreakProperty) { return {}; }
			
 
				-bool __attribute__((weak)) code_point_has_sentence_break_property(u32, SentenceBreakProperty) { return {}; }
			
 
				-
			
 
				 Optional<BidirectionalClass> __attribute__((weak)) bidirectional_class_from_string(StringView) { return {}; }
			
 
				 Optional<BidirectionalClass> __attribute__((weak)) bidirectional_class(u32) { return {}; }
			
 
				 
			
--- a/Userland/Libraries/LibUnicode/CharacterTypes.h
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.h
@@ -37,10 +37,6 @@ Optional<Script> script_from_string(StringView);
 
				 bool code_point_has_script(u32 code_point, Script script);
			
 
				 bool code_point_has_script_extension(u32 code_point, Script script);
			
 
				 
			
 
				-bool code_point_has_grapheme_break_property(u32 code_point, GraphemeBreakProperty property);
			
 
				-bool code_point_has_word_break_property(u32 code_point, WordBreakProperty property);
			
 
				-bool code_point_has_sentence_break_property(u32 code_point, SentenceBreakProperty property);
			
 
				-
			
 
				 Optional<BidirectionalClass> bidirectional_class_from_string(StringView);
			
 
				 Optional<BidirectionalClass> bidirectional_class(u32 code_point);
			
 
				 
			
--- a/Userland/Libraries/LibUnicode/Forward.h
+++ b/Userland/Libraries/LibUnicode/Forward.h
@@ -13,11 +13,8 @@ namespace Unicode {
 
				 enum class BidirectionalClass : u8;
			
 
				 enum class EmojiGroup : u8;
			
 
				 enum class GeneralCategory : u8;
			
 
				-enum class GraphemeBreakProperty : u8;
			
 
				 enum class Property : u8;
			
 
				 enum class Script : u8;
			
 
				-enum class SentenceBreakProperty : u8;
			
 
				-enum class WordBreakProperty : u8;
			
 
				 
			
 
				 struct CurrencyCode;
			
 
				 struct Emoji;
			
--- a/Userland/Libraries/LibUnicode/Segmentation.cpp
+++ b/Userland/Libraries/LibUnicode/Segmentation.cpp
@@ -1,491 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
			
 
				- * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
			
 
				- *
			
 
				- * SPDX-License-Identifier: BSD-2-Clause
			
 
				- */
			
 
				-
			
 
				-#include <AK/Utf16View.h>
			
 
				-#include <AK/Utf32View.h>
			
 
				-#include <AK/Utf8View.h>
			
 
				-#include <LibUnicode/CharacterTypes.h>
			
 
				-#include <LibUnicode/Segmentation.h>
			
 
				-
			
 
				-#if ENABLE_UNICODE_DATA
			
 
				-#    include <LibUnicode/UnicodeData.h>
			
 
				-#endif
			
 
				-
			
 
				-namespace Unicode {
			
 
				-
			
 
				-template<typename ViewType>
			
 
				-static size_t code_unit_length(ViewType const& view)
			
 
				-{
			
 
				-    if constexpr (IsSame<ViewType, Utf8View>)
			
 
				-        return view.byte_length();
			
 
				-    else if constexpr (IsSame<ViewType, Utf16View>)
			
 
				-        return view.length_in_code_units();
			
 
				-    else if constexpr (IsSame<ViewType, Utf32View>)
			
 
				-        return view.length();
			
 
				-    else
			
 
				-        static_assert(DependentFalse<ViewType>);
			
 
				-}
			
 
				-
			
 
				-template<typename ViewType, typename CodeUnitIterator>
			
 
				-static size_t code_unit_offset_of(ViewType const& view, CodeUnitIterator const& it)
			
 
				-{
			
 
				-    if constexpr (IsSame<ViewType, Utf8View>)
			
 
				-        return view.byte_offset_of(it);
			
 
				-    else if constexpr (IsSame<ViewType, Utf16View>)
			
 
				-        return view.code_unit_offset_of(it);
			
 
				-    else if constexpr (IsSame<ViewType, Utf32View>)
			
 
				-        return view.iterator_offset(it);
			
 
				-    else
			
 
				-        static_assert(DependentFalse<ViewType>);
			
 
				-}
			
 
				-
			
 
				-template<typename ViewType>
			
 
				-static void for_each_grapheme_segmentation_boundary_impl([[maybe_unused]] ViewType const& view, [[maybe_unused]] SegmentationCallback callback)
			
 
				-{
			
 
				-#if ENABLE_UNICODE_DATA
			
 
				-    using GBP = GraphemeBreakProperty;
			
 
				-
			
 
				-    // https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
			
 
				-    if (view.is_empty())
			
 
				-        return;
			
 
				-
			
 
				-    auto has_any_gbp = [](u32 code_point, auto&&... properties) {
			
 
				-        return (code_point_has_grapheme_break_property(code_point, properties) || ...);
			
 
				-    };
			
 
				-
			
 
				-    auto skip_incb_extend_linker_sequence = [&](auto& it) {
			
 
				-        while (true) {
			
 
				-            if (it == view.end() || !code_point_has_property(*it, Property::InCB_Extend))
			
 
				-                return;
			
 
				-
			
 
				-            auto next_it = it;
			
 
				-            ++next_it;
			
 
				-
			
 
				-            if (next_it == view.end() || !code_point_has_property(*next_it, Property::InCB_Linker))
			
 
				-                return;
			
 
				-
			
 
				-            it = next_it;
			
 
				-            ++it;
			
 
				-        }
			
 
				-    };
			
 
				-
			
 
				-    // GB1
			
 
				-    if (callback(0) == IterationDecision::Break)
			
 
				-        return;
			
 
				-
			
 
				-    if (code_unit_length(view) > 1) {
			
 
				-        auto it = view.begin();
			
 
				-        auto code_point = *it;
			
 
				-        u32 next_code_point = 0;
			
 
				-        auto current_ri_chain = 0;
			
 
				-
			
 
				-        for (++it; it != view.end(); ++it, code_point = next_code_point) {
			
 
				-            next_code_point = *it;
			
 
				-
			
 
				-            // GB9c
			
 
				-            if (code_point_has_property(code_point, Property::InCB_Consonant)) {
			
 
				-                auto it_copy = it;
			
 
				-                skip_incb_extend_linker_sequence(it_copy);
			
 
				-
			
 
				-                if (it_copy != view.end() && code_point_has_property(*it_copy, Property::InCB_Linker)) {
			
 
				-                    ++it_copy;
			
 
				-                    skip_incb_extend_linker_sequence(it_copy);
			
 
				-
			
 
				-                    if (it_copy != view.end() && code_point_has_property(*it_copy, Property::InCB_Consonant)) {
			
 
				-                        next_code_point = *it_copy;
			
 
				-                        it = it_copy;
			
 
				-                        continue;
			
 
				-                    }
			
 
				-                }
			
 
				-            }
			
 
				-
			
 
				-            // GB11
			
 
				-            if (code_point_has_property(code_point, Property::Extended_Pictographic) && has_any_gbp(next_code_point, GBP::Extend, GBP::ZWJ)) {
			
 
				-                auto it_copy = it;
			
 
				-
			
 
				-                while (it_copy != view.end() && has_any_gbp(*it_copy, GBP::Extend))
			
 
				-                    ++it_copy;
			
 
				-
			
 
				-                if (it_copy != view.end() && has_any_gbp(*it_copy, GBP::ZWJ)) {
			
 
				-                    ++it_copy;
			
 
				-
			
 
				-                    if (it_copy != view.end() && code_point_has_property(*it_copy, Property::Extended_Pictographic)) {
			
 
				-                        next_code_point = *it_copy;
			
 
				-                        it = it_copy;
			
 
				-                        continue;
			
 
				-                    }
			
 
				-                }
			
 
				-            }
			
 
				-
			
 
				-            auto code_point_is_cr = has_any_gbp(code_point, GBP::CR);
			
 
				-            auto next_code_point_is_lf = has_any_gbp(next_code_point, GBP::LF);
			
 
				-
			
 
				-            // GB3
			
 
				-            if (code_point_is_cr && next_code_point_is_lf)
			
 
				-                continue;
			
 
				-            // GB4, GB5
			
 
				-            if (code_point_is_cr || next_code_point_is_lf || has_any_gbp(next_code_point, GBP::CR, GBP::Control) || has_any_gbp(code_point, GBP::LF, GBP::Control)) {
			
 
				-                if (callback(code_unit_offset_of(view, it)) == IterationDecision::Break)
			
 
				-                    return;
			
 
				-                continue;
			
 
				-            }
			
 
				-
			
 
				-            auto next_code_point_is_v = has_any_gbp(next_code_point, GBP::V);
			
 
				-            auto next_code_point_is_t = has_any_gbp(next_code_point, GBP::T);
			
 
				-
			
 
				-            // GB6
			
 
				-            if (has_any_gbp(code_point, GBP::L) && (next_code_point_is_v || has_any_gbp(next_code_point, GBP::L, GBP::LV, GBP::LVT)))
			
 
				-                continue;
			
 
				-            // GB7
			
 
				-            if ((next_code_point_is_v || next_code_point_is_t) && has_any_gbp(code_point, GBP::LV, GBP::V))
			
 
				-                continue;
			
 
				-            // GB8
			
 
				-            if (next_code_point_is_t && has_any_gbp(code_point, GBP::LVT, GBP::T))
			
 
				-                continue;
			
 
				-
			
 
				-            // GB9
			
 
				-            if (has_any_gbp(next_code_point, GBP::Extend, GBP::ZWJ))
			
 
				-                continue;
			
 
				-            // GB9a
			
 
				-            if (has_any_gbp(next_code_point, GBP::SpacingMark))
			
 
				-                continue;
			
 
				-            // GB9b
			
 
				-            if (has_any_gbp(code_point, GBP::Prepend))
			
 
				-                continue;
			
 
				-
			
 
				-            auto code_point_is_ri = has_any_gbp(code_point, GBP::Regional_Indicator);
			
 
				-            current_ri_chain = code_point_is_ri ? current_ri_chain + 1 : 0;
			
 
				-
			
 
				-            // GB12, GB13
			
 
				-            if (code_point_is_ri && has_any_gbp(next_code_point, GBP::Regional_Indicator) && current_ri_chain % 2 == 1)
			
 
				-                continue;
			
 
				-
			
 
				-            // GB999
			
 
				-            if (callback(code_unit_offset_of(view, it)) == IterationDecision::Break)
			
 
				-                return;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    // GB2
			
 
				-    callback(code_unit_length(view));
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-void for_each_grapheme_segmentation_boundary(Utf8View const& view, SegmentationCallback callback)
			
 
				-{
			
 
				-    for_each_grapheme_segmentation_boundary_impl(view, move(callback));
			
 
				-}
			
 
				-
			
 
				-void for_each_grapheme_segmentation_boundary(Utf16View const& view, SegmentationCallback callback)
			
 
				-{
			
 
				-    for_each_grapheme_segmentation_boundary_impl(view, move(callback));
			
 
				-}
			
 
				-
			
 
				-void for_each_grapheme_segmentation_boundary(Utf32View const& view, SegmentationCallback callback)
			
 
				-{
			
 
				-    for_each_grapheme_segmentation_boundary_impl(view, move(callback));
			
 
				-}
			
 
				-
			
 
				-template<typename ViewType>
			
 
				-static void for_each_word_segmentation_boundary_impl([[maybe_unused]] ViewType const& view, [[maybe_unused]] SegmentationCallback callback)
			
 
				-{
			
 
				-#if ENABLE_UNICODE_DATA
			
 
				-    using WBP = WordBreakProperty;
			
 
				-
			
 
				-    // https://www.unicode.org/reports/tr29/#Word_Boundary_Rules
			
 
				-    if (view.is_empty())
			
 
				-        return;
			
 
				-
			
 
				-    auto has_any_wbp = [](u32 code_point, auto&&... properties) {
			
 
				-        return (code_point_has_word_break_property(code_point, properties) || ...);
			
 
				-    };
			
 
				-
			
 
				-    // WB1
			
 
				-    if (callback(0) == IterationDecision::Break)
			
 
				-        return;
			
 
				-
			
 
				-    if (code_unit_length(view) > 1) {
			
 
				-        auto it = view.begin();
			
 
				-        auto code_point = *it;
			
 
				-        u32 next_code_point;
			
 
				-        Optional<u32> previous_code_point;
			
 
				-        auto current_ri_chain = 0;
			
 
				-
			
 
				-        for (++it; it != view.end(); ++it, previous_code_point = code_point, code_point = next_code_point) {
			
 
				-            next_code_point = *it;
			
 
				-
			
 
				-            auto code_point_is_cr = has_any_wbp(code_point, WBP::CR);
			
 
				-            auto next_code_point_is_lf = has_any_wbp(next_code_point, WBP::LF);
			
 
				-
			
 
				-            // WB3
			
 
				-            if (code_point_is_cr && next_code_point_is_lf)
			
 
				-                continue;
			
 
				-            // WB3a, WB3b
			
 
				-            if (code_point_is_cr || next_code_point_is_lf || has_any_wbp(next_code_point, WBP::CR, WBP::Newline) || has_any_wbp(code_point, WBP::LF, WBP::Newline)) {
			
 
				-                if (callback(code_unit_offset_of(view, it)) == IterationDecision::Break)
			
 
				-                    return;
			
 
				-                continue;
			
 
				-            }
			
 
				-            // WB3c
			
 
				-            if (has_any_wbp(code_point, WBP::ZWJ) && code_point_has_property(next_code_point, Property::Extended_Pictographic))
			
 
				-                continue;
			
 
				-            // WB3d
			
 
				-            if (has_any_wbp(code_point, WBP::WSegSpace) && has_any_wbp(next_code_point, WBP::WSegSpace))
			
 
				-                continue;
			
 
				-
			
 
				-            // WB4
			
 
				-            if (has_any_wbp(next_code_point, WBP::Format, WBP::Extend, WBP::ZWJ))
			
 
				-                continue;
			
 
				-
			
 
				-            auto code_point_is_hebrew_letter = has_any_wbp(code_point, WBP::Hebrew_Letter);
			
 
				-            auto code_point_is_ah_letter = code_point_is_hebrew_letter || has_any_wbp(code_point, WBP::ALetter);
			
 
				-            auto next_code_point_is_hebrew_letter = has_any_wbp(next_code_point, WBP::Hebrew_Letter);
			
 
				-            auto next_code_point_is_ah_letter = next_code_point_is_hebrew_letter || has_any_wbp(next_code_point, WBP::ALetter);
			
 
				-
			
 
				-            // WB5
			
 
				-            if (code_point_is_ah_letter && next_code_point_is_ah_letter)
			
 
				-                continue;
			
 
				-
			
 
				-            Optional<u32> next_next_code_point;
			
 
				-            if (it != view.end()) {
			
 
				-                auto it_copy = it;
			
 
				-                ++it_copy;
			
 
				-                if (it_copy != view.end())
			
 
				-                    next_next_code_point = *it_copy;
			
 
				-            }
			
 
				-            bool next_next_code_point_is_hebrew_letter = next_next_code_point.has_value() && has_any_wbp(*next_next_code_point, WBP::Hebrew_Letter);
			
 
				-            bool next_next_code_point_is_ah_letter = next_next_code_point_is_hebrew_letter || (next_next_code_point.has_value() && has_any_wbp(*next_next_code_point, WBP::ALetter));
			
 
				-
			
 
				-            auto next_code_point_is_mid_num_let_q = has_any_wbp(next_code_point, WBP::MidNumLet, WBP::Single_Quote);
			
 
				-
			
 
				-            // WB6
			
 
				-            if (code_point_is_ah_letter && next_next_code_point_is_ah_letter && (next_code_point_is_mid_num_let_q || has_any_wbp(next_code_point, WBP::MidLetter)))
			
 
				-                continue;
			
 
				-
			
 
				-            auto code_point_is_mid_num_let_q = has_any_wbp(code_point, WBP::MidNumLet, WBP::Single_Quote);
			
 
				-            auto previous_code_point_is_hebrew_letter = previous_code_point.has_value() && has_any_wbp(*previous_code_point, WBP::Hebrew_Letter);
			
 
				-            auto previous_code_point_is_ah_letter = previous_code_point_is_hebrew_letter || (previous_code_point.has_value() && has_any_wbp(*previous_code_point, WBP::ALetter));
			
 
				-
			
 
				-            // WB7
			
 
				-            if (previous_code_point_is_ah_letter && next_code_point_is_ah_letter && (code_point_is_mid_num_let_q || has_any_wbp(code_point, WBP::MidLetter)))
			
 
				-                continue;
			
 
				-            // WB7a
			
 
				-            if (code_point_is_hebrew_letter && has_any_wbp(next_code_point, WBP::Single_Quote))
			
 
				-                continue;
			
 
				-            // WB7b
			
 
				-            if (code_point_is_hebrew_letter && next_next_code_point_is_hebrew_letter && has_any_wbp(next_code_point, WBP::Double_Quote))
			
 
				-                continue;
			
 
				-            // WB7c
			
 
				-            if (previous_code_point_is_hebrew_letter && next_code_point_is_hebrew_letter && has_any_wbp(code_point, WBP::Double_Quote))
			
 
				-                continue;
			
 
				-
			
 
				-            auto code_point_is_numeric = has_any_wbp(code_point, WBP::Numeric);
			
 
				-            auto next_code_point_is_numeric = has_any_wbp(next_code_point, WBP::Numeric);
			
 
				-
			
 
				-            // WB8
			
 
				-            if (code_point_is_numeric && next_code_point_is_numeric)
			
 
				-                continue;
			
 
				-            // WB9
			
 
				-            if (code_point_is_ah_letter && next_code_point_is_numeric)
			
 
				-                continue;
			
 
				-            // WB10
			
 
				-            if (code_point_is_numeric && next_code_point_is_ah_letter)
			
 
				-                continue;
			
 
				-
			
 
				-            auto previous_code_point_is_numeric = previous_code_point.has_value() && has_any_wbp(*previous_code_point, WBP::Numeric);
			
 
				-
			
 
				-            // WB11
			
 
				-            if (previous_code_point_is_numeric && next_code_point_is_numeric && (code_point_is_mid_num_let_q || has_any_wbp(code_point, WBP::MidNum)))
			
 
				-                continue;
			
 
				-
			
 
				-            bool next_next_code_point_is_numeric = next_next_code_point.has_value() && has_any_wbp(*next_next_code_point, WBP::Numeric);
			
 
				-
			
 
				-            // WB12
			
 
				-            if (code_point_is_numeric && next_next_code_point_is_numeric && (next_code_point_is_mid_num_let_q || has_any_wbp(next_code_point, WBP::MidNum)))
			
 
				-                continue;
			
 
				-
			
 
				-            auto code_point_is_katakana = has_any_wbp(code_point, WBP::Katakana);
			
 
				-            auto next_code_point_is_katakana = has_any_wbp(next_code_point, WBP::Katakana);
			
 
				-
			
 
				-            // WB13
			
 
				-            if (code_point_is_katakana && next_code_point_is_katakana)
			
 
				-                continue;
			
 
				-
			
 
				-            auto code_point_is_extend_num_let = has_any_wbp(code_point, WBP::ExtendNumLet);
			
 
				-
			
 
				-            // WB13a
			
 
				-            if ((code_point_is_ah_letter || code_point_is_numeric || code_point_is_katakana || code_point_is_extend_num_let) && has_any_wbp(next_code_point, WBP::ExtendNumLet))
			
 
				-                continue;
			
 
				-            // WB13b
			
 
				-            if (code_point_is_extend_num_let && (next_code_point_is_ah_letter || next_code_point_is_numeric || next_code_point_is_katakana))
			
 
				-                continue;
			
 
				-
			
 
				-            auto code_point_is_ri = has_any_wbp(code_point, WBP::Regional_Indicator);
			
 
				-            current_ri_chain = code_point_is_ri ? current_ri_chain + 1 : 0;
			
 
				-
			
 
				-            // WB15, WB16
			
 
				-            if (code_point_is_ri && has_any_wbp(next_code_point, WBP::Regional_Indicator) && current_ri_chain % 2 == 1)
			
 
				-                continue;
			
 
				-
			
 
				-            // WB999
			
 
				-            if (callback(code_unit_offset_of(view, it)) == IterationDecision::Break)
			
 
				-                return;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    // WB2
			
 
				-    callback(code_unit_length(view));
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-void for_each_word_segmentation_boundary(Utf8View const& view, SegmentationCallback callback)
			
 
				-{
			
 
				-    for_each_word_segmentation_boundary_impl(view, move(callback));
			
 
				-}
			
 
				-
			
 
				-void for_each_word_segmentation_boundary(Utf16View const& view, SegmentationCallback callback)
			
 
				-{
			
 
				-    for_each_word_segmentation_boundary_impl(view, move(callback));
			
 
				-}
			
 
				-
			
 
				-void for_each_word_segmentation_boundary(Utf32View const& view, SegmentationCallback callback)
			
 
				-{
			
 
				-    for_each_word_segmentation_boundary_impl(view, move(callback));
			
 
				-}
			
 
				-
			
 
				-template<typename ViewType>
			
 
				-static void for_each_sentence_segmentation_boundary_impl([[maybe_unused]] ViewType const& view, [[maybe_unused]] SegmentationCallback callback)
			
 
				-{
			
 
				-#if ENABLE_UNICODE_DATA
			
 
				-    using SBP = SentenceBreakProperty;
			
 
				-
			
 
				-    // https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
			
 
				-    if (view.is_empty())
			
 
				-        return;
			
 
				-
			
 
				-    auto has_any_sbp = [](u32 code_point, auto&&... properties) {
			
 
				-        return (code_point_has_sentence_break_property(code_point, properties) || ...);
			
 
				-    };
			
 
				-
			
 
				-    // SB1
			
 
				-    if (callback(0) == IterationDecision::Break)
			
 
				-        return;
			
 
				-
			
 
				-    if (code_unit_length(view) > 1) {
			
 
				-        auto it = view.begin();
			
 
				-        auto code_point = *it;
			
 
				-        u32 next_code_point;
			
 
				-        Optional<u32> previous_code_point;
			
 
				-        enum class TerminatorSequenceState {
			
 
				-            None,
			
 
				-            Term,
			
 
				-            Close,
			
 
				-            Sp
			
 
				-        } terminator_sequence_state { TerminatorSequenceState::None };
			
 
				-        auto term_was_a_term = false;
			
 
				-
			
 
				-        for (++it; it != view.end(); ++it, previous_code_point = code_point, code_point = next_code_point) {
			
 
				-            next_code_point = *it;
			
 
				-
			
 
				-            auto code_point_is_cr = has_any_sbp(code_point, SBP::CR);
			
 
				-            auto next_code_point_is_lf = has_any_sbp(next_code_point, SBP::LF);
			
 
				-
			
 
				-            // SB3
			
 
				-            if (code_point_is_cr && next_code_point_is_lf)
			
 
				-                continue;
			
 
				-
			
 
				-            auto code_point_is_para_sep = code_point_is_cr || has_any_sbp(code_point, SBP::LF, SBP::Sep);
			
 
				-
			
 
				-            // SB4
			
 
				-            if (code_point_is_para_sep) {
			
 
				-                if (callback(code_unit_offset_of(view, it)) == IterationDecision::Break)
			
 
				-                    return;
			
 
				-                continue;
			
 
				-            }
			
 
				-
			
 
				-            // SB5
			
 
				-            if (has_any_sbp(next_code_point, SBP::Format, SBP::Extend))
			
 
				-                continue;
			
 
				-
			
 
				-            auto code_point_is_a_term = has_any_sbp(code_point, SBP::ATerm);
			
 
				-
			
 
				-            // SB6
			
 
				-            if (code_point_is_a_term && has_any_sbp(next_code_point, SBP::Numeric))
			
 
				-                continue;
			
 
				-            // SB7
			
 
				-            if (code_point_is_a_term && previous_code_point.has_value() && has_any_sbp(*previous_code_point, SBP::Upper, SBP::Lower) && has_any_sbp(next_code_point, SBP::Upper))
			
 
				-                continue;
			
 
				-
			
 
				-            if (code_point_is_a_term || has_any_sbp(code_point, SBP::STerm)) {
			
 
				-                terminator_sequence_state = TerminatorSequenceState::Term;
			
 
				-                term_was_a_term = code_point_is_a_term;
			
 
				-            } else if (terminator_sequence_state >= TerminatorSequenceState::Term && terminator_sequence_state <= TerminatorSequenceState::Close && has_any_sbp(code_point, SBP::Close)) {
			
 
				-                terminator_sequence_state = TerminatorSequenceState::Close;
			
 
				-            } else if (terminator_sequence_state >= TerminatorSequenceState::Term && has_any_sbp(code_point, SBP::Sp)) {
			
 
				-                terminator_sequence_state = TerminatorSequenceState::Sp;
			
 
				-            } else {
			
 
				-                terminator_sequence_state = TerminatorSequenceState::None;
			
 
				-            }
			
 
				-
			
 
				-            // SB8
			
 
				-            if (terminator_sequence_state >= TerminatorSequenceState::Term && term_was_a_term) {
			
 
				-                auto it_copy = it;
			
 
				-                bool illegal_sequence = false;
			
 
				-                for (auto sequence_code_point = *it_copy; it_copy != view.end(); ++it_copy) {
			
 
				-                    if (has_any_sbp(sequence_code_point, SBP::Close, SBP::SContinue, SBP::Numeric, SBP::Sp, SBP::Format, SBP::Extend))
			
 
				-                        continue;
			
 
				-                    illegal_sequence = has_any_sbp(sequence_code_point, SBP::Lower);
			
 
				-                }
			
 
				-                if (illegal_sequence)
			
 
				-                    continue;
			
 
				-            }
			
 
				-
			
 
				-            // SB8a
			
 
				-            if (terminator_sequence_state >= TerminatorSequenceState::Term && (has_any_sbp(next_code_point, SBP::SContinue, SBP::STerm, SBP::ATerm)))
			
 
				-                continue;
			
 
				-
			
 
				-            auto next_code_point_is_sp = has_any_sbp(next_code_point, SBP::Sp);
			
 
				-            auto next_code_point_is_para_sep = has_any_sbp(next_code_point, SBP::Sep, SBP::CR, SBP::LF);
			
 
				-
			
 
				-            // SB9
			
 
				-            if (terminator_sequence_state >= TerminatorSequenceState::Term && terminator_sequence_state <= TerminatorSequenceState::Close && (next_code_point_is_sp || next_code_point_is_para_sep || has_any_sbp(next_code_point, SBP::Close)))
			
 
				-                continue;
			
 
				-
			
 
				-            // SB10
			
 
				-            if (terminator_sequence_state >= TerminatorSequenceState::Term && (next_code_point_is_sp || next_code_point_is_para_sep))
			
 
				-                continue;
			
 
				-
			
 
				-            // SB11
			
 
				-            if (terminator_sequence_state >= TerminatorSequenceState::Term)
			
 
				-                if (callback(code_unit_offset_of(view, it)) == IterationDecision::Break)
			
 
				-                    return;
			
 
				-
			
 
				-            // SB998
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    // SB2
			
 
				-    callback(code_unit_length(view));
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-void for_each_sentence_segmentation_boundary(Utf8View const& view, SegmentationCallback callback)
			
 
				-{
			
 
				-    for_each_sentence_segmentation_boundary_impl(view, move(callback));
			
 
				-}
			
 
				-
			
 
				-void for_each_sentence_segmentation_boundary(Utf16View const& view, SegmentationCallback callback)
			
 
				-{
			
 
				-    for_each_sentence_segmentation_boundary_impl(view, move(callback));
			
 
				-}
			
 
				-
			
 
				-void for_each_sentence_segmentation_boundary(Utf32View const& view, SegmentationCallback callback)
			
 
				-{
			
 
				-    for_each_sentence_segmentation_boundary_impl(view, move(callback));
			
 
				-}
			
 
				-
			
 
				-}
			
--- a/Userland/Libraries/LibUnicode/Segmentation.h
+++ b/Userland/Libraries/LibUnicode/Segmentation.h
@@ -1,134 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
			
 
				- * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
			
 
				- *
			
 
				- * SPDX-License-Identifier: BSD-2-Clause
			
 
				- */
			
 
				-
			
 
				-#pragma once
			
 
				-
			
 
				-#include <AK/Forward.h>
			
 
				-#include <AK/Function.h>
			
 
				-#include <AK/IterationDecision.h>
			
 
				-#include <AK/Optional.h>
			
 
				-#include <AK/Types.h>
			
 
				-
			
 
				-namespace Unicode {
			
 
				-
			
 
				-using SegmentationCallback = Function<IterationDecision(size_t)>;
			
 
				-
			
 
				-void for_each_grapheme_segmentation_boundary(Utf8View const&, SegmentationCallback);
			
 
				-void for_each_grapheme_segmentation_boundary(Utf16View const&, SegmentationCallback);
			
 
				-void for_each_grapheme_segmentation_boundary(Utf32View const&, SegmentationCallback);
			
 
				-
			
 
				-template<typename ViewType>
			
 
				-Optional<size_t> next_grapheme_segmentation_boundary(ViewType const& view, size_t index)
			
 
				-{
			
 
				-    Optional<size_t> result;
			
 
				-
			
 
				-    for_each_grapheme_segmentation_boundary(view, [&](auto boundary) {
			
 
				-        if (boundary > index) {
			
 
				-            result = boundary;
			
 
				-            return IterationDecision::Break;
			
 
				-        }
			
 
				-
			
 
				-        return IterationDecision::Continue;
			
 
				-    });
			
 
				-
			
 
				-    return result;
			
 
				-}
			
 
				-
			
 
				-template<typename ViewType>
			
 
				-Optional<size_t> previous_grapheme_segmentation_boundary(ViewType const& view, size_t index)
			
 
				-{
			
 
				-    Optional<size_t> result;
			
 
				-
			
 
				-    for_each_grapheme_segmentation_boundary(view, [&](auto boundary) {
			
 
				-        if (boundary < index) {
			
 
				-            result = boundary;
			
 
				-            return IterationDecision::Continue;
			
 
				-        }
			
 
				-
			
 
				-        return IterationDecision::Break;
			
 
				-    });
			
 
				-
			
 
				-    return result;
			
 
				-}
			
 
				-
			
 
				-void for_each_word_segmentation_boundary(Utf8View const&, SegmentationCallback);
			
 
				-void for_each_word_segmentation_boundary(Utf16View const&, SegmentationCallback);
			
 
				-void for_each_word_segmentation_boundary(Utf32View const&, SegmentationCallback);
			
 
				-
			
 
				-template<typename ViewType>
			
 
				-Optional<size_t> next_word_segmentation_boundary(ViewType const& view, size_t index)
			
 
				-{
			
 
				-    Optional<size_t> result;
			
 
				-
			
 
				-    for_each_word_segmentation_boundary(view, [&](auto boundary) {
			
 
				-        if (boundary > index) {
			
 
				-            result = boundary;
			
 
				-            return IterationDecision::Break;
			
 
				-        }
			
 
				-
			
 
				-        return IterationDecision::Continue;
			
 
				-    });
			
 
				-
			
 
				-    return result;
			
 
				-}
			
 
				-
			
 
				-template<typename ViewType>
			
 
				-Optional<size_t> previous_word_segmentation_boundary(ViewType const& view, size_t index)
			
 
				-{
			
 
				-    Optional<size_t> result;
			
 
				-
			
 
				-    for_each_word_segmentation_boundary(view, [&](auto boundary) {
			
 
				-        if (boundary < index) {
			
 
				-            result = boundary;
			
 
				-            return IterationDecision::Continue;
			
 
				-        }
			
 
				-
			
 
				-        return IterationDecision::Break;
			
 
				-    });
			
 
				-
			
 
				-    return result;
			
 
				-}
			
 
				-
			
 
				-void for_each_sentence_segmentation_boundary(Utf8View const&, SegmentationCallback);
			
 
				-void for_each_sentence_segmentation_boundary(Utf16View const&, SegmentationCallback);
			
 
				-void for_each_sentence_segmentation_boundary(Utf32View const&, SegmentationCallback);
			
 
				-
			
 
				-template<typename ViewType>
			
 
				-Optional<size_t> next_sentence_segmentation_boundary(ViewType const& view, size_t index)
			
 
				-{
			
 
				-    Optional<size_t> result;
			
 
				-
			
 
				-    for_each_sentence_segmentation_boundary(view, [&](auto boundary) {
			
 
				-        if (boundary > index) {
			
 
				-            result = boundary;
			
 
				-            return IterationDecision::Break;
			
 
				-        }
			
 
				-
			
 
				-        return IterationDecision::Continue;
			
 
				-    });
			
 
				-
			
 
				-    return result;
			
 
				-}
			
 
				-
			
 
				-template<typename ViewType>
			
 
				-Optional<size_t> previous_sentence_segmentation_boundary(ViewType const& view, size_t index)
			
 
				-{
			
 
				-    Optional<size_t> result;
			
 
				-
			
 
				-    for_each_sentence_segmentation_boundary(view, [&](auto boundary) {
			
 
				-        if (boundary < index) {
			
 
				-            result = boundary;
			
 
				-            return IterationDecision::Continue;
			
 
				-        }
			
 
				-
			
 
				-        return IterationDecision::Break;
			
 
				-    });
			
 
				-
			
 
				-    return result;
			
 
				-}
			
 
				-
			
 
				-}