LibIMAP: Handle invalid escape sequences in Quoted-Printable parser

When an invalid escape sequence is encountered, we now output the
characters unaltered instead of crashing.
This commit is contained in:
Tim Ledbetter 2023-10-05 18:42:27 +01:00 committed by Andreas Kling
parent debb4edcc8
commit 80238f29d2
Notes: sideshowbarker 2024-07-17 10:54:57 +09:00
2 changed files with 37 additions and 12 deletions

View file

@ -46,4 +46,17 @@ TEST_CASE(test_decode)
auto illegal_character_decode = MUST(IMAP::decode_quoted_printable(illegal_character_builder.to_deprecated_string()));
EXPECT(illegal_character_decode.is_empty());
// If an escape sequence is invalid the characters are output unaltered. Illegal characters are ignored as usual.
decode_equal("="sv, "="sv);
decode_equal("=Z"sv, "=Z"sv);
decode_equal("=\x7F"sv, "="sv);
decode_equal("=\x7F\x7F"sv, "="sv);
decode_equal("=A\x7F"sv, "=A"sv);
decode_equal("=A"sv, "=A"sv);
decode_equal("=AZ"sv, "=AZ"sv);
decode_equal("=\r"sv, "=\r"sv);
decode_equal("=\r\r"sv, "=\r\r"sv);
decode_equal("=\n\r"sv, "=\n\r"sv);
decode_equal("=\rA"sv, "=\rA"sv);
}

View file

@ -22,6 +22,18 @@ ErrorOr<ByteBuffer> decode_quoted_printable(StringView input)
GenericLexer lexer(input);
StringBuilder output;
// For any invalid escape sequence. RFC 2045 says a reasonable solution is just to append '=' followed by the unaltered escape characters.
auto append_invalid_escape_sequence = [&](Optional<char> first = {}, Optional<char> second = {}) -> ErrorOr<void> {
TRY(output.try_append('='));
if (first.has_value() && !is_illegal_character(first.value()))
TRY(output.try_append(first.value()));
if (second.has_value() && !is_illegal_character(second.value()))
TRY(output.try_append(second.value()));
return {};
};
// NOTE: The RFC says that encoded lines must not be longer than 76 characters.
// However, the RFC says implementations can ignore this and parse as is,
// which is the approach we're taking.
@ -34,7 +46,8 @@ ErrorOr<ByteBuffer> decode_quoted_printable(StringView input)
if (potential_character == '=') {
if (lexer.is_eof()) {
TODO();
TRY(append_invalid_escape_sequence());
continue;
}
char first_escape_character = lexer.consume();
@ -43,7 +56,8 @@ ErrorOr<ByteBuffer> decode_quoted_printable(StringView input)
// Thus we can use is_ascii_hex_digit.
if (is_ascii_hex_digit(first_escape_character)) {
if (lexer.is_eof()) {
TODO();
TRY(append_invalid_escape_sequence(first_escape_character));
continue;
}
char second_escape_character = lexer.consume();
@ -52,11 +66,13 @@ ErrorOr<ByteBuffer> decode_quoted_printable(StringView input)
u8 actual_character = (parse_ascii_hex_digit(first_escape_character) << 4) | parse_ascii_hex_digit(second_escape_character);
TRY(output.try_append(actual_character));
} else {
TODO();
TRY(append_invalid_escape_sequence(first_escape_character, second_escape_character));
continue;
}
} else if (first_escape_character == '\r') {
if (lexer.is_eof()) {
TODO();
TRY(append_invalid_escape_sequence(first_escape_character));
continue;
}
char second_escape_character = lexer.consume();
@ -64,16 +80,12 @@ ErrorOr<ByteBuffer> decode_quoted_printable(StringView input)
if (second_escape_character == '\n') {
// This is a soft line break. Don't append anything to the output.
} else {
TODO();
TRY(append_invalid_escape_sequence(first_escape_character, second_escape_character));
continue;
}
} else {
if (is_illegal_character(first_escape_character)) {
TODO();
}
// Invalid escape sequence. RFC 2045 says a reasonable solution is just to append '=' followed by the character.
TRY(output.try_append('='));
TRY(output.try_append(first_escape_character));
TRY(append_invalid_escape_sequence(first_escape_character));
continue;
}
} else {
TRY(output.try_append(potential_character));