mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-04 05:20:30 +00:00
LibTextCodec: Add alternate Cyrillic (aka Koi8-r) encoding
Fixes #6840.
This commit is contained in:
parent
2c1a6ce9a5
commit
6f5102f435
Notes:
sideshowbarker
2024-07-17 22:42:20 +09:00
Author: https://github.com/can3p 🔰 Commit: https://github.com/SerenityOS/serenity/commit/6f5102f435b Pull-request: https://github.com/SerenityOS/serenity/pull/11250
2 changed files with 41 additions and 0 deletions
|
@ -59,6 +59,14 @@ CyrillicDecoder& cyrillic_decoder()
|
|||
return *decoder;
|
||||
}
|
||||
|
||||
Koi8RDecoder& koi8r_decoder()
|
||||
{
|
||||
static Koi8RDecoder* decoder = nullptr;
|
||||
if (!decoder)
|
||||
decoder = new Koi8RDecoder;
|
||||
return *decoder;
|
||||
}
|
||||
|
||||
Latin9Decoder& latin9_decoder()
|
||||
{
|
||||
static Latin9Decoder* decoder = nullptr;
|
||||
|
@ -93,6 +101,8 @@ Decoder* decoder_for(const String& a_encoding)
|
|||
return &hebrew_decoder();
|
||||
if (encoding.value().equals_ignoring_case("windows-1251"))
|
||||
return &cyrillic_decoder();
|
||||
if (encoding.value().equals_ignoring_case("koi8-r"))
|
||||
return &koi8r_decoder();
|
||||
if (encoding.value().equals_ignoring_case("iso-8859-15"))
|
||||
return &latin9_decoder();
|
||||
if (encoding.value().equals_ignoring_case("windows-1254"))
|
||||
|
@ -165,6 +175,8 @@ Optional<String> get_standardized_encoding(const String& encoding)
|
|||
return "windows-1258";
|
||||
if (trimmed_lowercase_encoding.is_one_of("x-mac-cyrillic", "x-mac-ukrainian"))
|
||||
return "x-mac-cyrillic";
|
||||
if (trimmed_lowercase_encoding.is_one_of("koi8-r", "koi8r"))
|
||||
return "koi8-r";
|
||||
if (trimmed_lowercase_encoding.is_one_of("chinese", "csgb2312", "csiso58gb231280", "gb2312", "gb_2312", "gb_2312-80", "gbk", "iso-ir-58", "x-gbk"))
|
||||
return "GBK";
|
||||
if (trimmed_lowercase_encoding == "gb18030")
|
||||
|
@ -376,6 +388,30 @@ void CyrillicDecoder::process(StringView input, Function<void(u32)> on_code_poin
|
|||
}
|
||||
}
|
||||
|
||||
void Koi8RDecoder::process(StringView input, Function<void(u32)> on_code_point)
|
||||
{
|
||||
// clang-format off
|
||||
static constexpr Array<u32, 128> translation_table = {
|
||||
0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590,
|
||||
0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248,0x2264,0x2265,0xA0,0x2321,0xb0,0xb2,0xb7,0xf7,
|
||||
0x2550,0x2551,0x2552,0xd191,0x2553,0x2554,0x2555,0x2556,0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,
|
||||
0x255f,0x2560,0x2561,0xd081,0x2562,0x2563,0x2564,0x2565,0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0xa9,
|
||||
0x44e,0x430,0x431,0x446,0x434,0x435,0x444,0x433,0x445,0x438,0x439,0x43a,0x43b,0x43c,0x43d,0x43e,
|
||||
0x43f,0x44f,0x440,0x441,0x442,0x443,0x436,0x432,0x44c,0x44b,0x437,0x448,0x44d,0x449,0x447,0x44a,
|
||||
0x42e,0x410,0x441,0x426,0x414,0x415,0x424,0x413,0x425,0x418,0x419,0x41a,0x41b,0x41c,0x41d,0x41e,
|
||||
0x41f,0x42f,0x420,0x421,0x422,0x423,0x416,0x412,0x42c,0x42b,0x417,0x428,0x42d,0x429,0x427,0x42a,
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
for (unsigned char ch : input) {
|
||||
if (ch < 0x80) { // Superset of ASCII
|
||||
on_code_point(ch);
|
||||
} else {
|
||||
on_code_point(translation_table[ch - 0x80]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Latin9Decoder::process(StringView input, Function<void(u32)> on_code_point)
|
||||
{
|
||||
auto convert_latin9_to_utf8 = [](u8 ch) -> u32 {
|
||||
|
|
|
@ -52,6 +52,11 @@ public:
|
|||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
||||
};
|
||||
|
||||
class Koi8RDecoder final : public Decoder {
|
||||
public:
|
||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
||||
};
|
||||
|
||||
class Latin9Decoder final : public Decoder {
|
||||
public:
|
||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
||||
|
|
Loading…
Reference in a new issue