mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-26 17:40:27 +00:00
LibTextCodec: Support validating encoded inputs
This commit is contained in:
parent
ad4470bc39
commit
079c96376c
Notes:
sideshowbarker
2024-07-17 06:54:15 +09:00
Author: https://github.com/IdanHo Commit: https://github.com/SerenityOS/serenity/commit/079c96376c Pull-request: https://github.com/SerenityOS/serenity/pull/21968 Issue: https://github.com/SerenityOS/serenity/issues/21779
2 changed files with 55 additions and 0 deletions
|
@ -232,6 +232,12 @@ StringView get_output_encoding(StringView encoding)
|
|||
return encoding;
|
||||
}
|
||||
|
||||
bool Decoder::validate(StringView)
|
||||
{
|
||||
// By-default we assume that any input sequence is valid, character encodings that do not accept all inputs may override this
|
||||
return true;
|
||||
}
|
||||
|
||||
ErrorOr<String> Decoder::to_utf8(StringView input)
|
||||
{
|
||||
StringBuilder builder(input.length());
|
||||
|
@ -247,6 +253,11 @@ ErrorOr<void> UTF8Decoder::process(StringView input, Function<ErrorOr<void>(u32)
|
|||
return {};
|
||||
}
|
||||
|
||||
bool UTF8Decoder::validate(StringView input)
|
||||
{
|
||||
return Utf8View(input).validate();
|
||||
}
|
||||
|
||||
ErrorOr<String> UTF8Decoder::to_utf8(StringView input)
|
||||
{
|
||||
// Discard the BOM
|
||||
|
@ -299,6 +310,26 @@ ErrorOr<void> UTF16BEDecoder::process(StringView input, Function<ErrorOr<void>(u
|
|||
return {};
|
||||
}
|
||||
|
||||
bool UTF16BEDecoder::validate(StringView input)
|
||||
{
|
||||
size_t utf16_length = input.length() - (input.length() % 2);
|
||||
for (size_t i = 0; i < utf16_length; i += 2) {
|
||||
u16 w1 = (static_cast<u8>(input[i]) << 8) | static_cast<u8>(input[i + 1]);
|
||||
if (!is_unicode_surrogate(w1))
|
||||
continue;
|
||||
|
||||
if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length)
|
||||
return false;
|
||||
|
||||
u16 w2 = (static_cast<u8>(input[i + 2]) << 8) | static_cast<u8>(input[i + 3]);
|
||||
if (!Utf16View::is_low_surrogate(w2))
|
||||
return false;
|
||||
|
||||
i += 2;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
ErrorOr<String> UTF16BEDecoder::to_utf8(StringView input)
|
||||
{
|
||||
// Discard the BOM
|
||||
|
@ -352,6 +383,26 @@ ErrorOr<void> UTF16LEDecoder::process(StringView input, Function<ErrorOr<void>(u
|
|||
return {};
|
||||
}
|
||||
|
||||
bool UTF16LEDecoder::validate(StringView input)
|
||||
{
|
||||
size_t utf16_length = input.length() - (input.length() % 2);
|
||||
for (size_t i = 0; i < utf16_length; i += 2) {
|
||||
u16 w1 = static_cast<u8>(input[i]) | (static_cast<u8>(input[i + 1]) << 8);
|
||||
if (!is_unicode_surrogate(w1))
|
||||
continue;
|
||||
|
||||
if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length)
|
||||
return false;
|
||||
|
||||
u16 w2 = static_cast<u8>(input[i + 2]) | (static_cast<u8>(input[i + 3]) << 8);
|
||||
if (!Utf16View::is_low_surrogate(w2))
|
||||
return false;
|
||||
|
||||
i += 2;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
ErrorOr<String> UTF16LEDecoder::to_utf8(StringView input)
|
||||
{
|
||||
// Discard the BOM
|
||||
|
|
|
@ -18,6 +18,7 @@ namespace TextCodec {
|
|||
class Decoder {
|
||||
public:
|
||||
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) = 0;
|
||||
virtual bool validate(StringView);
|
||||
virtual ErrorOr<String> to_utf8(StringView);
|
||||
|
||||
protected:
|
||||
|
@ -27,18 +28,21 @@ protected:
|
|||
class UTF8Decoder final : public Decoder {
|
||||
public:
|
||||
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||
virtual bool validate(StringView) override;
|
||||
virtual ErrorOr<String> to_utf8(StringView) override;
|
||||
};
|
||||
|
||||
class UTF16BEDecoder final : public Decoder {
|
||||
public:
|
||||
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||
virtual bool validate(StringView) override;
|
||||
virtual ErrorOr<String> to_utf8(StringView) override;
|
||||
};
|
||||
|
||||
class UTF16LEDecoder final : public Decoder {
|
||||
public:
|
||||
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||
virtual bool validate(StringView) override;
|
||||
virtual ErrorOr<String> to_utf8(StringView) override;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue