From 033ea0e7fb0f72338ae95aa0413da838206440bb Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Sat, 10 Aug 2024 17:16:01 +1200 Subject: [PATCH] AK: Add String::from_utf8_with_replacement_character This takes a byte sequence and converts it to a UTF-8 string with the replacement character. --- AK/String.cpp | 10 ++++++++++ AK/String.h | 3 +++ Tests/AK/TestString.cpp | 13 +++++++++++++ 3 files changed, 26 insertions(+) diff --git a/AK/String.cpp b/AK/String.cpp index dd6502740f6..fd4c50c33e5 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -20,6 +20,16 @@ namespace AK { +String String::from_utf8_with_replacement_character(StringView view) +{ + StringBuilder builder; + + for (auto c : Utf8View { view }) + builder.append_code_point(c); + + return builder.to_string_without_validation(); +} + String String::from_utf8_without_validation(ReadonlyBytes bytes) { String result; diff --git a/AK/String.h b/AK/String.h index 130087eaae8..14f8e2101c3 100644 --- a/AK/String.h +++ b/AK/String.h @@ -51,6 +51,9 @@ public: // Creates a new String from a sequence of UTF-8 encoded code points. static ErrorOr from_utf8(StringView); + // Creates a new String using the replacement character for invalid bytes + [[nodiscard]] static String from_utf8_with_replacement_character(StringView); + template requires(IsOneOf, ByteString, DeprecatedFlyString, FlyString, String>) static ErrorOr from_utf8(T&&) = delete; diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp index f803e799529..ddb012f4efc 100644 --- a/Tests/AK/TestString.cpp +++ b/Tests/AK/TestString.cpp @@ -173,6 +173,19 @@ TEST_CASE(invalid_utf8) EXPECT(string3.error().string_literal().contains("Input was not valid UTF-8"sv)); } +TEST_CASE(with_replacement_character) +{ + auto string1 = String::from_utf8_with_replacement_character("long string \xf4\x8f\xbf\xc0"sv); // U+110000 + Array string1_expected { 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x20, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd }; + EXPECT_EQ(string1.bytes(), string1_expected); + + auto string3 = String::from_utf8_with_replacement_character("A valid string!"sv); + EXPECT_EQ(string3, "A valid string!"sv); + + auto string4 = String::from_utf8_with_replacement_character(""sv); + EXPECT_EQ(string4, ""sv); +} + TEST_CASE(from_code_points) { for (u32 code_point = 0; code_point < 0x80; ++code_point) {