QuotedPrintable.cpp 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. /*
  2. * Copyright (c) 2021, Luke Wilde <lukew@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/CharacterTypes.h>
  7. #include <AK/GenericLexer.h>
  8. #include <AK/StringBuilder.h>
  9. #include <LibIMAP/QuotedPrintable.h>
  10. namespace IMAP {
  11. static constexpr bool is_illegal_character(char c)
  12. {
  13. return (u8)c > 0x7E || (is_ascii_control(c) && c != '\t' && c != '\r' && c != '\n');
  14. }
  15. // RFC 2045 Section 6.7 "Quoted-Printable Content-Transfer-Encoding", https://datatracker.ietf.org/doc/html/rfc2045#section-6.7
  16. ErrorOr<ByteBuffer> decode_quoted_printable(StringView input)
  17. {
  18. GenericLexer lexer(input);
  19. StringBuilder output;
  20. // NOTE: The RFC says that encoded lines must not be longer than 76 characters.
  21. // However, the RFC says implementations can ignore this and parse as is,
  22. // which is the approach we're taking.
  23. while (!lexer.is_eof()) {
  24. char potential_character = lexer.consume();
  25. if (is_illegal_character(potential_character))
  26. continue;
  27. if (potential_character == '=') {
  28. if (lexer.is_eof()) {
  29. TODO();
  30. }
  31. char first_escape_character = lexer.consume();
  32. // The RFC doesn't formally allow lowercase, but says implementations can treat lowercase the same as uppercase.
  33. // Thus we can use is_ascii_hex_digit.
  34. if (is_ascii_hex_digit(first_escape_character)) {
  35. if (lexer.is_eof()) {
  36. TODO();
  37. }
  38. char second_escape_character = lexer.consume();
  39. if (is_ascii_hex_digit(second_escape_character)) {
  40. u8 actual_character = (parse_ascii_hex_digit(first_escape_character) << 4) | parse_ascii_hex_digit(second_escape_character);
  41. TRY(output.try_append(actual_character));
  42. } else {
  43. TODO();
  44. }
  45. } else if (first_escape_character == '\r') {
  46. if (lexer.is_eof()) {
  47. TODO();
  48. }
  49. char second_escape_character = lexer.consume();
  50. if (second_escape_character == '\n') {
  51. // This is a soft line break. Don't append anything to the output.
  52. } else {
  53. TODO();
  54. }
  55. } else {
  56. if (is_illegal_character(first_escape_character)) {
  57. TODO();
  58. }
  59. // Invalid escape sequence. RFC 2045 says a reasonable solution is just to append '=' followed by the character.
  60. TRY(output.try_append('='));
  61. TRY(output.try_append(first_escape_character));
  62. }
  63. } else {
  64. TRY(output.try_append(potential_character));
  65. }
  66. }
  67. return output.to_byte_buffer();
  68. }
  69. }