mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
AK: Add String::from_utf8_with_replacement_character
This takes a byte sequence and converts it to a UTF-8 string with the replacement character.
This commit is contained in:
parent
84a09476ba
commit
033ea0e7fb
Notes:
github-actions[bot]
2024-08-10 08:47:28 +00:00
Author: https://github.com/shannonbooth Commit: https://github.com/LadybirdBrowser/ladybird/commit/033ea0e7fb0 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1023 Reviewed-by: https://github.com/awesomekling
3 changed files with 26 additions and 0 deletions
|
@ -20,6 +20,16 @@
|
||||||
|
|
||||||
namespace AK {
|
namespace AK {
|
||||||
|
|
||||||
|
String String::from_utf8_with_replacement_character(StringView view)
|
||||||
|
{
|
||||||
|
StringBuilder builder;
|
||||||
|
|
||||||
|
for (auto c : Utf8View { view })
|
||||||
|
builder.append_code_point(c);
|
||||||
|
|
||||||
|
return builder.to_string_without_validation();
|
||||||
|
}
|
||||||
|
|
||||||
String String::from_utf8_without_validation(ReadonlyBytes bytes)
|
String String::from_utf8_without_validation(ReadonlyBytes bytes)
|
||||||
{
|
{
|
||||||
String result;
|
String result;
|
||||||
|
|
|
@ -51,6 +51,9 @@ public:
|
||||||
// Creates a new String from a sequence of UTF-8 encoded code points.
|
// Creates a new String from a sequence of UTF-8 encoded code points.
|
||||||
static ErrorOr<String> from_utf8(StringView);
|
static ErrorOr<String> from_utf8(StringView);
|
||||||
|
|
||||||
|
// Creates a new String using the replacement character for invalid bytes
|
||||||
|
[[nodiscard]] static String from_utf8_with_replacement_character(StringView);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
requires(IsOneOf<RemoveCVReference<T>, ByteString, DeprecatedFlyString, FlyString, String>)
|
requires(IsOneOf<RemoveCVReference<T>, ByteString, DeprecatedFlyString, FlyString, String>)
|
||||||
static ErrorOr<String> from_utf8(T&&) = delete;
|
static ErrorOr<String> from_utf8(T&&) = delete;
|
||||||
|
|
|
@ -173,6 +173,19 @@ TEST_CASE(invalid_utf8)
|
||||||
EXPECT(string3.error().string_literal().contains("Input was not valid UTF-8"sv));
|
EXPECT(string3.error().string_literal().contains("Input was not valid UTF-8"sv));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(with_replacement_character)
|
||||||
|
{
|
||||||
|
auto string1 = String::from_utf8_with_replacement_character("long string \xf4\x8f\xbf\xc0"sv); // U+110000
|
||||||
|
Array<u8, 24> string1_expected { 0x6c, 0x6f, 0x6e, 0x67, 0x20, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x20, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd };
|
||||||
|
EXPECT_EQ(string1.bytes(), string1_expected);
|
||||||
|
|
||||||
|
auto string3 = String::from_utf8_with_replacement_character("A valid string!"sv);
|
||||||
|
EXPECT_EQ(string3, "A valid string!"sv);
|
||||||
|
|
||||||
|
auto string4 = String::from_utf8_with_replacement_character(""sv);
|
||||||
|
EXPECT_EQ(string4, ""sv);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(from_code_points)
|
TEST_CASE(from_code_points)
|
||||||
{
|
{
|
||||||
for (u32 code_point = 0; code_point < 0x80; ++code_point) {
|
for (u32 code_point = 0; code_point < 0x80; ++code_point) {
|
||||||
|
|
Loading…
Reference in a new issue