TestTextEncoders.cpp 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. /*
  2. * Copyright (c) 2024, Ben Jilks <benjyjilks@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibTest/TestCase.h>
  7. #include <LibTextCodec/Encoder.h>
  8. TEST_CASE(test_utf8_encode)
  9. {
  10. TextCodec::UTF8Encoder encoder;
  11. // Unicode character U+1F600 GRINNING FACE
  12. auto test_string = "\U0001F600"sv;
  13. Vector<u8> processed_bytes;
  14. MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
  15. return processed_bytes.try_append(byte);
  16. }));
  17. EXPECT(processed_bytes.size() == 4);
  18. EXPECT(processed_bytes[0] == 0xF0);
  19. EXPECT(processed_bytes[1] == 0x9F);
  20. EXPECT(processed_bytes[2] == 0x98);
  21. EXPECT(processed_bytes[3] == 0x80);
  22. }
  23. TEST_CASE(test_euc_jp_encoder)
  24. {
  25. TextCodec::EUCJPEncoder encoder;
  26. // U+A5 Yen Sign
  27. // U+3088 Hiragana Letter Yo
  28. // U+30C4 Katakana Letter Tu
  29. auto test_string = "\U000000A5\U00003088\U000030C4"sv;
  30. Vector<u8> processed_bytes;
  31. MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
  32. return processed_bytes.try_append(byte);
  33. }));
  34. EXPECT(processed_bytes.size() == 5);
  35. EXPECT(processed_bytes[0] == 0x5C);
  36. EXPECT(processed_bytes[1] == 0xA4);
  37. EXPECT(processed_bytes[2] == 0xE8);
  38. EXPECT(processed_bytes[3] == 0xA5);
  39. EXPECT(processed_bytes[4] == 0xC4);
  40. }
  41. TEST_CASE(test_euc_kr_encoder)
  42. {
  43. TextCodec::EUCKREncoder encoder;
  44. // U+B29F Hangul Syllable Neulh
  45. // U+7C97 CJK Unified Ideograph-7C97
  46. auto test_string = "\U0000B29F\U00007C97"sv;
  47. Vector<u8> processed_bytes;
  48. MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
  49. return processed_bytes.try_append(byte);
  50. }));
  51. EXPECT(processed_bytes.size() == 4);
  52. EXPECT(processed_bytes[0] == 0x88);
  53. EXPECT(processed_bytes[1] == 0x6B);
  54. EXPECT(processed_bytes[2] == 0xF0);
  55. EXPECT(processed_bytes[3] == 0xD8);
  56. }
  57. TEST_CASE(test_big5_encoder)
  58. {
  59. TextCodec::Big5Encoder encoder;
  60. // U+A7 Section Sign
  61. // U+70D7 CJK Unified Ideograph-70D7
  62. auto test_string = "\U000000A7\U000070D7"sv;
  63. Vector<u8> processed_bytes;
  64. MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
  65. return processed_bytes.try_append(byte);
  66. }));
  67. EXPECT(processed_bytes.size() == 4);
  68. EXPECT(processed_bytes[0] == 0xA1);
  69. EXPECT(processed_bytes[1] == 0xB1);
  70. EXPECT(processed_bytes[2] == 0xD2);
  71. EXPECT(processed_bytes[3] == 0x71);
  72. }
  73. TEST_CASE(test_gb18030_encoder)
  74. {
  75. TextCodec::GB18030Encoder encoder;
  76. // U+20AC Euro Sign
  77. // U+E4C5 Private Use Area
  78. auto test_string = "\U000020AC\U0000E4C5"sv;
  79. Vector<u8> processed_bytes;
  80. MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
  81. return processed_bytes.try_append(byte);
  82. }));
  83. EXPECT(processed_bytes.size() == 4);
  84. EXPECT(processed_bytes[0] == 0xA2);
  85. EXPECT(processed_bytes[1] == 0xE3);
  86. EXPECT(processed_bytes[2] == 0xFE);
  87. EXPECT(processed_bytes[3] == 0xFE);
  88. }