mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-11 17:00:37 +00:00
LibTextCodec: Ignore BYTE ORDER MARK at the start of utf8/16 strings
Before, this was getting included as part of the output text, which was confusing the HTML parser. Nobody needs the BOM after we have identified the codec, so now we remove it when converting to UTF-8.
This commit is contained in:
parent
9ec02e7137
commit
d7ffa51424
Notes:
sideshowbarker
2024-07-18 03:55:29 +09:00
Author: https://github.com/AtkinsSJ Commit: https://github.com/SerenityOS/serenity/commit/d7ffa514249 Pull-request: https://github.com/SerenityOS/serenity/pull/10040
1 changed files with 15 additions and 3 deletions
|
@ -208,7 +208,13 @@ void UTF8Decoder::process(const StringView& input, Function<void(u32)> on_code_p
|
|||
|
||||
String UTF8Decoder::to_utf8(const StringView& input)
|
||||
{
|
||||
return input;
|
||||
// Discard the BOM
|
||||
auto bomless_input = input;
|
||||
if (auto bytes = input.bytes(); bytes.size() >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) {
|
||||
bomless_input = input.substring_view(3);
|
||||
}
|
||||
|
||||
return bomless_input;
|
||||
}
|
||||
|
||||
void UTF16BEDecoder::process(const StringView& input, Function<void(u32)> on_code_point)
|
||||
|
@ -222,8 +228,14 @@ void UTF16BEDecoder::process(const StringView& input, Function<void(u32)> on_cod
|
|||
|
||||
String UTF16BEDecoder::to_utf8(const StringView& input)
|
||||
{
|
||||
StringBuilder builder(input.length() / 2);
|
||||
process(input, [&builder](u32 c) { builder.append_code_point(c); });
|
||||
// Discard the BOM
|
||||
auto bomless_input = input;
|
||||
if (auto bytes = input.bytes(); bytes.size() >= 2 && bytes[0] == 0xFE && bytes[1] == 0xFF) {
|
||||
bomless_input = input.substring_view(2);
|
||||
}
|
||||
|
||||
StringBuilder builder(bomless_input.length() / 2);
|
||||
process(bomless_input, [&builder](u32 c) { builder.append_code_point(c); });
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue