From d50724956e3e9fec55fb9528cf129f00e9ce5843 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sun, 22 Jan 2023 09:24:12 -0500 Subject: [PATCH] AK: Add a method to find the byte offset of a code point --- AK/String.cpp | 14 ++++++++++++++ AK/String.h | 2 ++ Tests/AK/TestString.cpp | 42 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) diff --git a/AK/String.cpp b/AK/String.cpp index 828e165eed0..8aa3d7dbcd0 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -275,6 +275,20 @@ ErrorOr> String::split_limit(u32 separator, size_t limit, SplitBe return result; } +Optional String::find_byte_offset(u32 code_point, size_t from_byte_offset) const +{ + auto code_points = this->code_points(); + if (from_byte_offset >= code_points.byte_length()) + return {}; + + for (auto it = code_points.iterator_at_byte_offset(from_byte_offset); it != code_points.end(); ++it) { + if (*it == code_point) + return code_points.byte_offset_of(it); + } + + return {}; +} + bool String::operator==(String const& other) const { if (is_short_string()) diff --git a/AK/String.h b/AK/String.h index cf34bcac503..1a7c01b1a97 100644 --- a/AK/String.h +++ b/AK/String.h @@ -127,6 +127,8 @@ public: ErrorOr> split_limit(u32 separator, size_t limit, SplitBehavior = SplitBehavior::Nothing) const; ErrorOr> split(u32 separator, SplitBehavior = SplitBehavior::Nothing) const; + Optional find_byte_offset(u32 code_point, size_t from_byte_offset = 0) const; + [[nodiscard]] bool operator==(String const&) const; [[nodiscard]] bool operator!=(String const& other) const { return !(*this == other); } diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp index f90b7bf393d..872a9446678 100644 --- a/Tests/AK/TestString.cpp +++ b/Tests/AK/TestString.cpp @@ -315,3 +315,45 @@ TEST_CASE(split) EXPECT_EQ(parts[2], "ω"sv); } } + +TEST_CASE(find_byte_offset) +{ + { + String string {}; + auto index = string.find_byte_offset(0); + EXPECT(!index.has_value()); + } + { + auto string = MUST(String::from_utf8("foo"sv)); + + auto index1 = string.find_byte_offset('f'); + EXPECT_EQ(index1, 0u); + + auto index2 = string.find_byte_offset('o'); + EXPECT_EQ(index2, 1u); + + auto index3 = string.find_byte_offset('o', *index2 + 1); + EXPECT_EQ(index3, 2u); + + auto index4 = string.find_byte_offset('b'); + EXPECT(!index4.has_value()); + } + { + auto string = MUST(String::from_utf8("ωΣωΣω"sv)); + + auto index1 = string.find_byte_offset(0x03C9U); + EXPECT_EQ(index1, 0u); + + auto index2 = string.find_byte_offset(0x03A3u); + EXPECT_EQ(index2, 2u); + + auto index3 = string.find_byte_offset(0x03C9U, 2); + EXPECT_EQ(index3, 4u); + + auto index4 = string.find_byte_offset(0x03A3u, 4); + EXPECT_EQ(index4, 6u); + + auto index5 = string.find_byte_offset(0x03C9U, 6); + EXPECT_EQ(index5, 8u); + } +}