TestTextDecoders.cpp 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. /*
  2. * Copyright (c) 2022, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/String.h>
  7. #include <AK/Vector.h>
  8. #include <LibTest/TestCase.h>
  9. #include <LibTextCodec/Decoder.h>
  10. TEST_CASE(test_utf8_decode)
  11. {
  12. auto decoder = TextCodec::UTF8Decoder();
  13. // Bytes for U+1F600 GRINNING FACE
  14. auto test_string = "\xf0\x9f\x98\x80"sv;
  15. EXPECT(decoder.validate(test_string));
  16. Vector<u32> processed_code_points;
  17. MUST(decoder.process(test_string, [&](u32 code_point) {
  18. return processed_code_points.try_append(code_point);
  19. }));
  20. EXPECT(processed_code_points.size() == 1);
  21. EXPECT(processed_code_points[0] == 0x1F600);
  22. EXPECT(MUST(decoder.to_utf8(test_string)) == test_string);
  23. }
  24. TEST_CASE(test_utf16be_decode)
  25. {
  26. auto decoder = TextCodec::UTF16BEDecoder();
  27. // This is the output of `python3 -c "print('säk😀'.encode('utf-16be'))"`.
  28. auto test_string = "\x00s\x00\xe4\x00k\xd8=\xde\x00"sv;
  29. EXPECT(decoder.validate(test_string));
  30. Vector<u32> processed_code_points;
  31. MUST(decoder.process(test_string, [&](u32 code_point) {
  32. return processed_code_points.try_append(code_point);
  33. }));
  34. EXPECT(processed_code_points.size() == 4);
  35. EXPECT(processed_code_points[0] == 0x73);
  36. EXPECT(processed_code_points[1] == 0xE4);
  37. EXPECT(processed_code_points[2] == 0x6B);
  38. EXPECT(processed_code_points[3] == 0x1F600);
  39. auto utf8 = MUST(decoder.to_utf8(test_string));
  40. EXPECT_EQ(utf8, "säk😀"sv);
  41. }
  42. TEST_CASE(test_utf16le_decode)
  43. {
  44. auto decoder = TextCodec::UTF16LEDecoder();
  45. // This is the output of `python3 -c "print('säk😀'.encode('utf-16le'))"`.
  46. auto test_string = "s\x00\xe4\x00k\x00=\xd8\x00\xde"sv;
  47. EXPECT(decoder.validate(test_string));
  48. Vector<u32> processed_code_points;
  49. MUST(decoder.process(test_string, [&](u32 code_point) {
  50. return processed_code_points.try_append(code_point);
  51. }));
  52. EXPECT(processed_code_points.size() == 4);
  53. EXPECT(processed_code_points[0] == 0x73);
  54. EXPECT(processed_code_points[1] == 0xE4);
  55. EXPECT(processed_code_points[2] == 0x6B);
  56. EXPECT(processed_code_points[3] == 0x1F600);
  57. auto utf8 = MUST(decoder.to_utf8(test_string));
  58. EXPECT_EQ(utf8, "säk😀"sv);
  59. }