LibUnicode: Update out-of-date spec links

And remove links that aren't adding much value but will often get out of
date (i.e. links to UCD files, which are already all listed in
unicode_data.cmake).
This commit is contained in:
Timothy Flynn 2023-01-17 12:12:29 -05:00 committed by Linus Groh
parent 142abc0b2e
commit 8d9fb898d7
Notes: sideshowbarker 2024-07-17 05:00:08 +09:00
3 changed files with 10 additions and 14 deletions

View file

@ -30,8 +30,7 @@ struct CodePointRange {
u32 last;
};
// SpecialCasing source: https://www.unicode.org/Public/13.0.0/ucd/SpecialCasing.txt
// Field descriptions: https://www.unicode.org/reports/tr44/tr44-13.html#SpecialCasing.txt
// https://www.unicode.org/reports/tr44/#SpecialCasing.txt
struct SpecialCasing {
u32 index { 0 };
u32 code_point { 0 };
@ -42,7 +41,7 @@ struct SpecialCasing {
DeprecatedString condition;
};
// Field descriptions: https://www.unicode.org/reports/tr44/#Character_Decomposition_Mappings
// https://www.unicode.org/reports/tr44/#Character_Decomposition_Mappings
struct CodePointDecomposition {
// `tag` is a string since it's used for codegen as an enum value.
DeprecatedString tag { "Canonical"sv };
@ -50,12 +49,10 @@ struct CodePointDecomposition {
size_t decomposition_size { 0 };
};
// PropList source: https://www.unicode.org/Public/13.0.0/ucd/PropList.txt
// Property descriptions: https://www.unicode.org/reports/tr44/tr44-13.html#PropList.txt
// https://www.unicode.org/reports/tr44/#PropList.txt
using PropList = HashMap<DeprecatedString, Vector<CodePointRange>>;
// Normalization source: https://www.unicode.org/Public/13.0.0/ucd/DerivedNormalizationProps.txt
// Normalization descriptions: https://www.unicode.org/reports/tr44/#DerivedNormalizationProps.txt
// https://www.unicode.org/reports/tr44/#DerivedNormalizationProps.txt
enum class QuickCheck {
Yes,
No,
@ -75,9 +72,7 @@ struct CodePointName {
size_t name { 0 };
};
// UnicodeData source: https://www.unicode.org/Public/13.0.0/ucd/UnicodeData.txt
// Field descriptions: https://www.unicode.org/reports/tr44/tr44-13.html#UnicodeData.txt
// https://www.unicode.org/reports/tr44/#General_Category_Values
// https://www.unicode.org/reports/tr44/#UnicodeData.txt
struct CodePointData {
u32 code_point { 0 };
DeprecatedString name;
@ -128,6 +123,7 @@ struct UnicodeData {
HashMap<u32, size_t> code_point_display_name_aliases;
Vector<CodePointName> code_point_display_names;
// https://www.unicode.org/reports/tr44/#General_Category_Values
PropList general_categories;
Vector<Alias> general_category_aliases;

View file

@ -17,9 +17,6 @@
# include <LibUnicode/UnicodeData.h>
#endif
// For details on the algorithms used here, see Section 3.13 Default Case Algorithms
// https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf
namespace Unicode {
Optional<DeprecatedString> __attribute__((weak)) code_point_display_name(u32) { return {}; }

View file

@ -16,7 +16,7 @@
#endif
// For details on the algorithms used here, see Section 3.13 Default Case Algorithms
// https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf
namespace Unicode::Detail {
@ -197,6 +197,7 @@ static SpecialCasing const* find_matching_special_case(u32 code_point, Utf8View
#endif
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
ErrorOr<void> build_lowercase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale)
{
#if ENABLE_UNICODE_DATA
@ -223,6 +224,7 @@ ErrorOr<void> build_lowercase_string([[maybe_unused]] Utf8View code_points, [[ma
#endif
}
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
ErrorOr<void> build_uppercase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale)
{
#if ENABLE_UNICODE_DATA
@ -249,6 +251,7 @@ ErrorOr<void> build_uppercase_string([[maybe_unused]] Utf8View code_points, [[ma
#endif
}
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34078
ErrorOr<void> build_titlecase_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder, [[maybe_unused]] Optional<StringView> const& locale)
{
#if ENABLE_UNICODE_DATA