mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-26 09:30:24 +00:00
LibTextCodec: Add Turkish (aka ISO-8859-9, Windows-1254) encoding
This commit is contained in:
parent
21ee0ad6fc
commit
3b2a528b33
Notes:
sideshowbarker
2024-07-18 11:37:28 +09:00
Author: https://github.com/aatosmajava Commit: https://github.com/SerenityOS/serenity/commit/3b2a528b33e Pull-request: https://github.com/SerenityOS/serenity/pull/8218 Reviewed-by: https://github.com/IdanHo ✅
2 changed files with 44 additions and 0 deletions
|
@ -67,6 +67,14 @@ Latin9Decoder& latin9_decoder()
|
|||
return *decoder;
|
||||
}
|
||||
|
||||
TurkishDecoder& turkish_decoder()
|
||||
{
|
||||
static TurkishDecoder* decoder = nullptr;
|
||||
if (!decoder)
|
||||
decoder = new TurkishDecoder;
|
||||
return *decoder;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Decoder* decoder_for(const String& a_encoding)
|
||||
|
@ -87,6 +95,8 @@ Decoder* decoder_for(const String& a_encoding)
|
|||
return &cyrillic_decoder();
|
||||
if (encoding.value().equals_ignoring_case("iso-8859-15"))
|
||||
return &latin9_decoder();
|
||||
if (encoding.value().equals_ignoring_case("windows-1254"))
|
||||
return &turkish_decoder();
|
||||
}
|
||||
dbgln("TextCodec: No decoder implemented for encoding '{}'", a_encoding);
|
||||
return nullptr;
|
||||
|
@ -383,4 +393,33 @@ String Latin9Decoder::to_utf8(const StringView& input)
|
|||
return builder.to_string();
|
||||
}
|
||||
|
||||
String TurkishDecoder::to_utf8(const StringView& input)
|
||||
{
|
||||
auto convert_turkish_to_utf8 = [](u8 ch) -> u32 {
|
||||
// Turkish (aka ISO-8859-9, Windows-1254) is the same as the first 256 Unicode code points, except for 6 characters.
|
||||
switch (ch) {
|
||||
case 0xD0:
|
||||
return 0x11E;
|
||||
case 0xDD:
|
||||
return 0x130;
|
||||
case 0xDE:
|
||||
return 0x15E;
|
||||
case 0xF0:
|
||||
return 0x11F;
|
||||
case 0xFD:
|
||||
return 0x131;
|
||||
case 0xFE:
|
||||
return 0x15F;
|
||||
default:
|
||||
return ch;
|
||||
}
|
||||
};
|
||||
|
||||
StringBuilder builder(input.length());
|
||||
for (auto ch : input) {
|
||||
builder.append_code_point(convert_turkish_to_utf8(ch));
|
||||
}
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -53,6 +53,11 @@ public:
|
|||
virtual String to_utf8(const StringView&) override;
|
||||
};
|
||||
|
||||
class TurkishDecoder final : public Decoder {
|
||||
public:
|
||||
virtual String to_utf8(const StringView&) override;
|
||||
};
|
||||
|
||||
Decoder* decoder_for(const String& encoding);
|
||||
Optional<String> get_standardized_encoding(const String& encoding);
|
||||
bool is_standardized_encoding(const String& encoding);
|
||||
|
|
Loading…
Reference in a new issue