|
@@ -6,6 +6,7 @@
|
|
|
|
|
|
#include <LibTest/TestCase.h>
|
|
|
|
|
|
+#include <AK/ByteBuffer.h>
|
|
|
#include <AK/Utf8View.h>
|
|
|
|
|
|
TEST_CASE(decode_ascii)
|
|
@@ -33,12 +34,15 @@ TEST_CASE(decode_utf8)
|
|
|
EXPECT(valid_bytes == (size_t)utf8.byte_length());
|
|
|
|
|
|
u32 expected[] = { 1055, 1088, 1080, 1074, 1077, 1090, 44, 32, 1084, 1080, 1088, 33, 32, 128512, 32, 947, 949, 953, 940, 32, 963, 959, 965, 32, 954, 972, 963, 956, 959, 962, 32, 12371, 12435, 12395, 12385, 12399, 19990, 30028 };
|
|
|
+ String expected_underlying_bytes[] = { "П", "р", "и", "в", "е", "т", ",", " ", "м", "и", "р", "!", " ", "😀", " ", "γ", "ε", "ι", "ά", " ", "σ", "ο", "υ", " ", "κ", "ό", "σ", "μ", "ο", "ς", " ", "こ", "ん", "に", "ち", "は", "世", "界" };
|
|
|
size_t expected_size = sizeof(expected) / sizeof(expected[0]);
|
|
|
|
|
|
size_t i = 0;
|
|
|
- for (u32 code_point : utf8) {
|
|
|
+ for (auto it = utf8.begin(); it != utf8.end(); ++it) {
|
|
|
+ u32 code_point = *it;
|
|
|
VERIFY(i < expected_size);
|
|
|
EXPECT_EQ(code_point, expected[i]);
|
|
|
+ EXPECT_EQ(it.underlying_code_point_bytes(), expected_underlying_bytes[i].bytes());
|
|
|
i++;
|
|
|
}
|
|
|
EXPECT_EQ(i, expected_size);
|
|
@@ -103,3 +107,78 @@ TEST_CASE(iterate_utf8)
|
|
|
return Test::Crash::Failure::DidNotCrash;
|
|
|
});
|
|
|
}
|
|
|
+
|
|
|
+TEST_CASE(decode_invalid_ut8)
|
|
|
+{
|
|
|
+ // Test case 1 : Getting an extension byte as first byte of the code point
|
|
|
+ {
|
|
|
+ char raw_data[] = { 'a', 'b', (char)0xA0, 'd', 0 };
|
|
|
+ Utf8View view { raw_data };
|
|
|
+ u32 expected_characters[] = { 'a', 'b', 0xFFFD, 'd' };
|
|
|
+ String expected_underlying_bytes[] = { "a", "b", "\xA0", "d" };
|
|
|
+ size_t expected_size = sizeof(expected_characters) / sizeof(expected_characters[0]);
|
|
|
+ size_t i = 0;
|
|
|
+ for (auto it = view.begin(); it != view.end(); ++it) {
|
|
|
+ u32 code_point = *it;
|
|
|
+ VERIFY(i < expected_size);
|
|
|
+ EXPECT_EQ(code_point, expected_characters[i]);
|
|
|
+ EXPECT_EQ(it.underlying_code_point_bytes(), expected_underlying_bytes[i].bytes());
|
|
|
+ i++;
|
|
|
+ }
|
|
|
+ VERIFY(i == expected_size);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Test case 2 : Getting a non-extension byte when an extension byte is expected
|
|
|
+ {
|
|
|
+ char raw_data[] = { 'a', 'b', (char)0xC0, 'd', 'e', 0 };
|
|
|
+ Utf8View view { raw_data };
|
|
|
+ u32 expected_characters[] = { 'a', 'b', 0xFFFD, 'd', 'e' };
|
|
|
+ String expected_underlying_bytes[] = { "a", "b", "\xC0", "d", "e" };
|
|
|
+ size_t expected_size = sizeof(expected_characters) / sizeof(expected_characters[0]);
|
|
|
+ size_t i = 0;
|
|
|
+ for (auto it = view.begin(); it != view.end(); ++it) {
|
|
|
+ u32 code_point = *it;
|
|
|
+ VERIFY(i < expected_size);
|
|
|
+ EXPECT_EQ(code_point, expected_characters[i]);
|
|
|
+ EXPECT_EQ(it.underlying_code_point_bytes(), expected_underlying_bytes[i].bytes());
|
|
|
+ i++;
|
|
|
+ }
|
|
|
+ VERIFY(i == expected_size);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Test case 3 : Not enough bytes before the end of the string
|
|
|
+ {
|
|
|
+ char raw_data[] = { 'a', 'b', (char)0x90, 'd', 0 };
|
|
|
+ Utf8View view { raw_data };
|
|
|
+ u32 expected_characters[] = { 'a', 'b', 0xFFFD, 'd' };
|
|
|
+ String expected_underlying_bytes[] = { "a", "b", "\x90", "d" };
|
|
|
+ size_t expected_size = sizeof(expected_characters) / sizeof(expected_characters[0]);
|
|
|
+ size_t i = 0;
|
|
|
+ for (auto it = view.begin(); it != view.end(); ++it) {
|
|
|
+ u32 code_point = *it;
|
|
|
+ VERIFY(i < expected_size);
|
|
|
+ EXPECT_EQ(code_point, expected_characters[i]);
|
|
|
+ EXPECT_EQ(it.underlying_code_point_bytes(), expected_underlying_bytes[i].bytes());
|
|
|
+ i++;
|
|
|
+ }
|
|
|
+ VERIFY(i == expected_size);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Test case 4 : Not enough bytes at the end of the string
|
|
|
+ {
|
|
|
+ char raw_data[] = { 'a', 'b', 'c', (char)0x90, 0 };
|
|
|
+ Utf8View view { raw_data };
|
|
|
+ u32 expected_characters[] = { 'a', 'b', 'c', 0xFFFD };
|
|
|
+ String expected_underlying_bytes[] = { "a", "b", "c", "\x90" };
|
|
|
+ size_t expected_size = sizeof(expected_characters) / sizeof(expected_characters[0]);
|
|
|
+ size_t i = 0;
|
|
|
+ for (auto it = view.begin(); it != view.end(); ++it) {
|
|
|
+ u32 code_point = *it;
|
|
|
+ VERIFY(i < expected_size);
|
|
|
+ EXPECT_EQ(code_point, expected_characters[i]);
|
|
|
+ EXPECT_EQ(it.underlying_code_point_bytes(), expected_underlying_bytes[i].bytes());
|
|
|
+ i++;
|
|
|
+ }
|
|
|
+ VERIFY(i == expected_size);
|
|
|
+ }
|
|
|
+}
|