From 3dccaa39d8e1830ed2c0994b9ef07e6881d0865d Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Mon, 10 Jan 2022 11:47:23 -0500 Subject: [PATCH] AK: Define a traits helper for case-insensitive StringView hashing Currently, we define a CaseInsensitiveStringTraits structure for String. Using this structure for StringView involves allocating a String from that view, and a second string to convert that intermediate string to lowercase. This defines CaseInsensitiveStringViewTraits (and the underlying helper case_insensitive_string_hash) to avoid allocations. --- AK/StringHash.h | 21 +++++++++++++++++++++ AK/StringView.h | 10 ++++++++++ Tests/AK/TestStringView.cpp | 12 ++++++++++++ 3 files changed, 43 insertions(+) diff --git a/AK/StringHash.h b/AK/StringHash.h index 1a166ae0409..6d5774af18d 100644 --- a/AK/StringHash.h +++ b/AK/StringHash.h @@ -24,6 +24,27 @@ constexpr u32 string_hash(char const* characters, size_t length, u32 seed = 0) return hash; } +constexpr u32 case_insensitive_string_hash(char const* characters, size_t length, u32 seed = 0) +{ + // AK/CharacterTypes.h cannot be included from here. + auto to_lowercase = [](char ch) -> u32 { + if (ch >= 'A' && ch <= 'Z') + return static_cast(ch) + 0x20; + return static_cast(ch); + }; + + u32 hash = seed; + for (size_t i = 0; i < length; ++i) { + hash += to_lowercase(characters[i]); + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += hash << 3; + hash ^= hash >> 11; + hash += hash << 15; + return hash; +} + } using AK::string_hash; diff --git a/AK/StringView.h b/AK/StringView.h index 7906ccf6f44..86f5747096e 100644 --- a/AK/StringView.h +++ b/AK/StringView.h @@ -221,6 +221,15 @@ struct Traits : public GenericTraits { static unsigned hash(StringView s) { return s.hash(); } }; +struct CaseInsensitiveStringViewTraits : public Traits { + static unsigned hash(StringView s) + { + if (s.is_empty()) + return 0; + return case_insensitive_string_hash(s.characters_without_null_termination(), s.length()); + } +}; + } [[nodiscard]] ALWAYS_INLINE constexpr AK::StringView operator"" sv(const char* cstring, size_t length) @@ -228,4 +237,5 @@ struct Traits : public GenericTraits { return AK::StringView(cstring, length); } +using AK::CaseInsensitiveStringViewTraits; using AK::StringView; diff --git a/Tests/AK/TestStringView.cpp b/Tests/AK/TestStringView.cpp index 7b1d6435dc5..c2c81aa5e69 100644 --- a/Tests/AK/TestStringView.cpp +++ b/Tests/AK/TestStringView.cpp @@ -189,3 +189,15 @@ TEST_CASE(constexpr_stuff) } #undef do_test } + +TEST_CASE(case_insensitive_hash) +{ + auto string1 = "abcdef"sv; + auto string2 = "ABCDEF"sv; + auto string3 = "aBcDeF"sv; + auto string4 = "foo"sv; + + EXPECT_EQ(CaseInsensitiveStringViewTraits::hash(string1), CaseInsensitiveStringViewTraits::hash(string2)); + EXPECT_EQ(CaseInsensitiveStringViewTraits::hash(string1), CaseInsensitiveStringViewTraits::hash(string3)); + EXPECT_NE(CaseInsensitiveStringViewTraits::hash(string1), CaseInsensitiveStringViewTraits::hash(string4)); +}