From aec2dadfddf106fa06c10814ae6f2d0d5a56ea0c Mon Sep 17 00:00:00 2001 From: martinfalisse Date: Mon, 16 Jan 2023 17:12:53 +0100 Subject: [PATCH] AK: Add `split()` for `String` --- AK/String.cpp | 30 ++++++++++++++++++++++++++++++ AK/String.h | 4 ++++ Tests/AK/TestString.cpp | 20 ++++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/AK/String.cpp b/AK/String.cpp index f480f73d7d2..1924aef1130 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -251,6 +251,36 @@ ErrorOr String::vformatted(StringView fmtstr, TypeErasedFormatParams& pa return builder.to_string(); } +ErrorOr> String::split(u32 separator, SplitBehavior split_behavior) const +{ + return split_limit(separator, 0, split_behavior); +} + +ErrorOr> String::split_limit(u32 separator, size_t limit, SplitBehavior split_behavior) const +{ + Vector result; + + if (is_empty()) + return result; + + bool keep_empty = has_flag(split_behavior, SplitBehavior::KeepEmpty); + + size_t substring_start = 0; + for (auto it = code_points().begin(); it != code_points().end() && (result.size() + 1) != limit; ++it) { + u32 code_point = *it; + if (code_point == separator) { + size_t substring_length = code_points().iterator_offset(it) - substring_start; + if (substring_length != 0 || keep_empty) + TRY(result.try_append(TRY(substring_from_byte_offset_with_shared_superstring(substring_start, substring_length)))); + substring_start = code_points().iterator_offset(it) + it.underlying_code_point_length_in_bytes(); + } + } + size_t tail_length = code_points().byte_length() - substring_start; + if (tail_length != 0 || keep_empty) + TRY(result.try_append(TRY(substring_from_byte_offset_with_shared_superstring(substring_start, tail_length)))); + return result; +} + bool String::operator==(String const& other) const { if (is_short_string()) diff --git a/AK/String.h b/AK/String.h index 75f643bfd62..c5a22111a04 100644 --- a/AK/String.h +++ b/AK/String.h @@ -16,6 +16,7 @@ #include #include #include +#include namespace AK { @@ -101,6 +102,9 @@ public: ErrorOr replace(StringView needle, StringView replacement, ReplaceMode replace_mode) const; ErrorOr reverse() const; + [[nodiscard]] ErrorOr> split_limit(u32 separator, size_t limit, SplitBehavior = SplitBehavior::Nothing) const; + [[nodiscard]] ErrorOr> split(u32 separator, SplitBehavior = SplitBehavior::Nothing) const; + [[nodiscard]] bool operator==(String const&) const; [[nodiscard]] bool operator!=(String const& other) const { return !(*this == other); } diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp index c089cfcf723..c5785c7dabf 100644 --- a/Tests/AK/TestString.cpp +++ b/Tests/AK/TestString.cpp @@ -272,3 +272,23 @@ TEST_CASE(is_one_of) EXPECT(bar.is_one_of("bar"sv, "foo"sv)); EXPECT(bar.is_one_of("bar"sv)); } + +TEST_CASE(split) +{ + { + auto test = MUST(String::from_utf8("foo bar baz"sv)); + auto parts = MUST(test.split(' ')); + EXPECT_EQ(parts.size(), 3u); + EXPECT_EQ(parts[0], "foo"); + EXPECT_EQ(parts[1], "bar"); + EXPECT_EQ(parts[2], "baz"); + } + { + auto test = MUST(String::from_utf8("ωΣ2ωΣω"sv)); + auto parts = MUST(test.split(0x03A3u)); + EXPECT_EQ(parts.size(), 3u); + EXPECT_EQ(parts[0], "ω"sv); + EXPECT_EQ(parts[1], "2ω"sv); + EXPECT_EQ(parts[2], "ω"sv); + } +}