소스 검색

AK: Add String::from_utf8_with_replacement_character

This takes a byte sequence and converts it to a UTF-8 string with the
replacement character.
Shannon Booth 1 년 전
부모
커밋
033ea0e7fb
3개의 변경된 파일26개의 추가작업 그리고 0개의 파일을 삭제
  1. 10 0
      AK/String.cpp
  2. 3 0
      AK/String.h
  3. 13 0
      Tests/AK/TestString.cpp

+ 10 - 0
AK/String.cpp

@@ -20,6 +20,16 @@
 
 namespace AK {
 
+String String::from_utf8_with_replacement_character(StringView view)
+{
+    StringBuilder builder;
+
+    for (auto c : Utf8View { view })
+        builder.append_code_point(c);
+
+    return builder.to_string_without_validation();
+}
+
 String String::from_utf8_without_validation(ReadonlyBytes bytes)
 {
     String result;

+ 3 - 0
AK/String.h

@@ -51,6 +51,9 @@ public:
     // Creates a new String from a sequence of UTF-8 encoded code points.
     static ErrorOr<String> from_utf8(StringView);
 
+    // Creates a new String using the replacement character for invalid bytes
+    [[nodiscard]] static String from_utf8_with_replacement_character(StringView);
+
     template<typename T>
     requires(IsOneOf<RemoveCVReference<T>, ByteString, DeprecatedFlyString, FlyString, String>)
     static ErrorOr<String> from_utf8(T&&) = delete;

+ 13 - 0
Tests/AK/TestString.cpp

@@ -173,6 +173,19 @@ TEST_CASE(invalid_utf8)
     EXPECT(string3.error().string_literal().contains("Input was not valid UTF-8"sv));
 }
 
+TEST_CASE(with_replacement_character)
+{
+    auto string1 = String::from_utf8_with_replacement_character("long string \xf4\x8f\xbf\xc0"sv); // U+110000
+    Array<u8, 24> string1_expected { 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x20, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd };
+    EXPECT_EQ(string1.bytes(), string1_expected);
+
+    auto string3 = String::from_utf8_with_replacement_character("A valid string!"sv);
+    EXPECT_EQ(string3, "A valid string!"sv);
+
+    auto string4 = String::from_utf8_with_replacement_character(""sv);
+    EXPECT_EQ(string4, ""sv);
+}
+
 TEST_CASE(from_code_points)
 {
     for (u32 code_point = 0; code_point < 0x80; ++code_point) {