mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
AK+LibUnicode: Provide Unicode-aware caseless String matching
The Unicode spec defines much more complicated caseless matching algorithms in its Collation spec. This implements the "basic" case folding comparison.
This commit is contained in:
parent
8f2589b3b0
commit
537fcaf59e
Notes:
sideshowbarker
2024-07-17 20:58:35 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/537fcaf59e Pull-request: https://github.com/SerenityOS/serenity/pull/17060 Reviewed-by: https://github.com/linusg ✅
3 changed files with 79 additions and 0 deletions
|
@ -49,6 +49,10 @@ public:
|
|||
ErrorOr<String> to_lowercase(Optional<StringView> const& locale = {}) const;
|
||||
ErrorOr<String> to_uppercase(Optional<StringView> const& locale = {}) const;
|
||||
ErrorOr<String> to_titlecase(Optional<StringView> const& locale = {}) const;
|
||||
ErrorOr<String> to_casefold() const;
|
||||
|
||||
// Compare this String against another string with caseless matching. Using this method requires linking LibUnicode into your application.
|
||||
ErrorOr<bool> equals_ignoring_case(String const&) const;
|
||||
|
||||
// Creates a substring with a deep copy of the specified data window.
|
||||
ErrorOr<String> substring_from_byte_offset(size_t start, size_t byte_count) const;
|
||||
|
|
|
@ -187,6 +187,66 @@ TEST_CASE(to_titlecase)
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE(equals_ignoring_case)
|
||||
{
|
||||
{
|
||||
String string1 {};
|
||||
String string2 {};
|
||||
|
||||
EXPECT(MUST(string1.equals_ignoring_case(string2)));
|
||||
}
|
||||
{
|
||||
auto string1 = MUST(String::from_utf8("abcd"sv));
|
||||
auto string2 = MUST(String::from_utf8("ABCD"sv));
|
||||
auto string3 = MUST(String::from_utf8("AbCd"sv));
|
||||
auto string4 = MUST(String::from_utf8("dcba"sv));
|
||||
|
||||
EXPECT(MUST(string1.equals_ignoring_case(string2)));
|
||||
EXPECT(MUST(string1.equals_ignoring_case(string3)));
|
||||
EXPECT(!MUST(string1.equals_ignoring_case(string4)));
|
||||
|
||||
EXPECT(MUST(string2.equals_ignoring_case(string1)));
|
||||
EXPECT(MUST(string2.equals_ignoring_case(string3)));
|
||||
EXPECT(!MUST(string2.equals_ignoring_case(string4)));
|
||||
|
||||
EXPECT(MUST(string3.equals_ignoring_case(string1)));
|
||||
EXPECT(MUST(string3.equals_ignoring_case(string2)));
|
||||
EXPECT(!MUST(string3.equals_ignoring_case(string4)));
|
||||
}
|
||||
{
|
||||
auto string1 = MUST(String::from_utf8("\u00DF"sv)); // LATIN SMALL LETTER SHARP S
|
||||
auto string2 = MUST(String::from_utf8("SS"sv));
|
||||
auto string3 = MUST(String::from_utf8("Ss"sv));
|
||||
auto string4 = MUST(String::from_utf8("ss"sv));
|
||||
auto string5 = MUST(String::from_utf8("S"sv));
|
||||
auto string6 = MUST(String::from_utf8("s"sv));
|
||||
|
||||
EXPECT(MUST(string1.equals_ignoring_case(string2)));
|
||||
EXPECT(MUST(string1.equals_ignoring_case(string3)));
|
||||
EXPECT(MUST(string1.equals_ignoring_case(string4)));
|
||||
EXPECT(!MUST(string1.equals_ignoring_case(string5)));
|
||||
EXPECT(!MUST(string1.equals_ignoring_case(string6)));
|
||||
|
||||
EXPECT(MUST(string2.equals_ignoring_case(string1)));
|
||||
EXPECT(MUST(string2.equals_ignoring_case(string3)));
|
||||
EXPECT(MUST(string2.equals_ignoring_case(string4)));
|
||||
EXPECT(!MUST(string2.equals_ignoring_case(string5)));
|
||||
EXPECT(!MUST(string2.equals_ignoring_case(string6)));
|
||||
|
||||
EXPECT(MUST(string3.equals_ignoring_case(string1)));
|
||||
EXPECT(MUST(string3.equals_ignoring_case(string2)));
|
||||
EXPECT(MUST(string3.equals_ignoring_case(string4)));
|
||||
EXPECT(!MUST(string3.equals_ignoring_case(string5)));
|
||||
EXPECT(!MUST(string3.equals_ignoring_case(string6)));
|
||||
|
||||
EXPECT(MUST(string4.equals_ignoring_case(string1)));
|
||||
EXPECT(MUST(string4.equals_ignoring_case(string2)));
|
||||
EXPECT(MUST(string4.equals_ignoring_case(string3)));
|
||||
EXPECT(!MUST(string4.equals_ignoring_case(string5)));
|
||||
EXPECT(!MUST(string4.equals_ignoring_case(string6)));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(is_one_of)
|
||||
{
|
||||
auto foo = MUST(String::from_utf8("foo"sv));
|
||||
|
|
|
@ -33,4 +33,19 @@ ErrorOr<String> String::to_titlecase(Optional<StringView> const& locale) const
|
|||
return builder.to_string();
|
||||
}
|
||||
|
||||
ErrorOr<String> String::to_casefold() const
|
||||
{
|
||||
StringBuilder builder;
|
||||
TRY(Unicode::Detail::build_casefold_string(code_points(), builder));
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34145
|
||||
ErrorOr<bool> String::equals_ignoring_case(String const& other) const
|
||||
{
|
||||
// A string X is a caseless match for a string Y if and only if:
|
||||
// toCasefold(X) = toCasefold(Y)
|
||||
return TRY(to_casefold()) == TRY(other.to_casefold());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue