1 år sedan · 077a8058c3
--- a/Meta/Lagom/CMakeLists.txt
+++ b/Meta/Lagom/CMakeLists.txt
@@ -658,6 +658,7 @@ if (BUILD_LAGOM)
 
				             LibCompress
			
 
				             LibGL
			
 
				             LibGfx
			
 
				+            LibIMAP
			
 
				             LibLocale
			
 
				             LibMarkdown
			
 
				             LibPDF
			
--- a/Tests/LibIMAP/CMakeLists.txt
+++ b/Tests/LibIMAP/CMakeLists.txt
@@ -1,5 +1,6 @@
 
				 set(TEST_SOURCES
			
 
				     TestQuotedPrintable.cpp
			
 
				+    TestMessageHeaderEncoding.cpp
			
 
				 )
			
 
				 
			
 
				 foreach(source IN LISTS TEST_SOURCES)
			
--- a/Tests/LibIMAP/TestMessageHeaderEncoding.cpp
+++ b/Tests/LibIMAP/TestMessageHeaderEncoding.cpp
@@ -0,0 +1,55 @@
 
				+/*
			
 
				+ * Copyright (c) 2023, Valtteri Koskivuori <vkoskiv@gmail.com>
			
 
				+ *
			
 
				+ * SPDX-License-Identifier: BSD-2-Clause
			
 
				+ */
			
 
				+
			
 
				+#include <AK/CharacterTypes.h>
			
 
				+#include <LibIMAP/MessageHeaderEncoding.h>
			
 
				+#include <LibTest/TestCase.h>
			
 
				+
			
 
				+TEST_CASE(test_decode)
			
 
				+{
			
 
				+    auto decode_equal = [](StringView input, StringView expected) {
			
 
				+        auto decoded = MUST(IMAP::decode_rfc2047_encoded_words(input));
			
 
				+        EXPECT_EQ(StringView(decoded), StringView(expected));
			
 
				+    };
			
 
				+
			
 
				+    // Underscores should end up as spaces
			
 
				+    decode_equal("=?utf-8?Q?Spaces_should_be_spaces_!?="sv, "Spaces should be spaces !"sv);
			
 
				+
			
 
				+    // RFC 2047 Section 8 "Examples", https://datatracker.ietf.org/doc/html/rfc2047#section-8
			
 
				+
			
 
				+    decode_equal("=?ISO-8859-1?Q?a?="sv, "a"sv);
			
 
				+
			
 
				+    decode_equal("=?ISO-8859-1?Q?a?= b"sv, "a b"sv);
			
 
				+
			
 
				+    // White space between adjacent 'encoded-word's is not displayed.
			
 
				+    decode_equal("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="sv, "ab"sv);
			
 
				+
			
 
				+    // Even multiple SPACEs between 'encoded-word's are ignored for the purpose of display.
			
 
				+    decode_equal("=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?="sv, "ab"sv);
			
 
				+    decode_equal("=?ISO-8859-1?Q?a?=        =?ISO-8859-1?Q?b?=    =?ISO-8859-1?Q?c?==?ISO-8859-1?Q?d?="sv, "abcd"sv);
			
 
				+
			
 
				+    // Any amount of linear-space-white between 'encoded-word's, even if it includes a CRLF followed by one or more SPACEs, is ignored for the purposes of display.
			
 
				+    decode_equal("=?utf-8?Q?a?=\r\n=?utf-8?Q?b?=    \r\n=?utf-8?Q?c?=\r\n      =?utf-8?Q?d?="sv, "abcd"sv);
			
 
				+
			
 
				+    // In order to cause a SPACE to be displayed within a portion of encoded text, the SPACE MUST be encoded as part of the 'encoded-word'.
			
 
				+    decode_equal("=?ISO-8859-1?Q?a_b?="sv, "a b"sv);
			
 
				+
			
 
				+    // In order to cause a SPACE to be displayed between two strings of encoded text, the SPACE MAY be encoded as part of one of the 'encoded-word's.
			
 
				+    decode_equal("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?="sv, "a b"sv);
			
 
				+
			
 
				+    // More examples from the RFC document, a nice mix of different charsets & encodings.
			
 
				+    auto long_input = "From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>"
			
 
				+                      "To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>"
			
 
				+                      "CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>"
			
 
				+                      "Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?="
			
 
				+                      "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="sv;
			
 
				+
			
 
				+    auto long_expected = "From: Keith Moore <moore@cs.utk.edu>"
			
 
				+                         "To: Keld Jørn Simonsen <keld@dkuug.dk>"
			
 
				+                         "CC: André Pirard <PIRARD@vm1.ulg.ac.be>"
			
 
				+                         "Subject: If you can read this you understand the example."sv;
			
 
				+    decode_equal(long_input, long_expected);
			
 
				+}
			
--- a/Userland/Applications/Mail/MailWidget.cpp
+++ b/Userland/Applications/Mail/MailWidget.cpp
@@ -20,6 +20,7 @@
 
				 #include <LibGUI/Statusbar.h>
			
 
				 #include <LibGUI/TableView.h>
			
 
				 #include <LibGUI/TreeView.h>
			
 
				+#include <LibIMAP/MessageHeaderEncoding.h>
			
 
				 #include <LibIMAP/QuotedPrintable.h>
			
 
				 
			
 
				 MailWidget::MailWidget()
			
@@ -396,6 +397,10 @@ void MailWidget::selected_mailbox()
 
				         if (subject.is_empty())
			
 
				             subject = "(No subject)";
			
 
				 
			
 
				+        if (subject.contains("=?"sv) && subject.contains("?="sv)) {
			
 
				+            subject = MUST(IMAP::decode_rfc2047_encoded_words(subject));
			
 
				+        }
			
 
				+
			
 
				         auto& from_iterator_value = from_iterator->get<1>().value();
			
 
				         auto from_index = from_iterator_value.find("From:"sv);
			
 
				         if (!from_index.has_value())
			
--- a/Userland/Libraries/LibIMAP/CMakeLists.txt
+++ b/Userland/Libraries/LibIMAP/CMakeLists.txt
@@ -3,9 +3,10 @@ set(SOURCES
 
				     Objects.cpp
			
 
				     Parser.cpp
			
 
				     QuotedPrintable.cpp
			
 
				+    MessageHeaderEncoding.cpp
			
 
				 )
			
 
				 
			
 
				 set(GENERATED_SOURCES)
			
 
				 
			
 
				 serenity_lib(LibIMAP imap)
			
 
				-target_link_libraries(LibIMAP PRIVATE LibCore LibCrypto LibTLS)
			
 
				+target_link_libraries(LibIMAP PRIVATE LibCore LibCrypto LibTextCodec LibTLS)
			
--- a/Userland/Libraries/LibIMAP/MessageHeaderEncoding.cpp
+++ b/Userland/Libraries/LibIMAP/MessageHeaderEncoding.cpp
@@ -0,0 +1,97 @@
 
				+/*
			
 
				+ * Copyright (c) 2023, Valtteri Koskivuori <vkoskiv@gmail.com>
			
 
				+ *
			
 
				+ * SPDX-License-Identifier: BSD-2-Clause
			
 
				+ */
			
 
				+
			
 
				+#include "QuotedPrintable.h"
			
 
				+#include <AK/Base64.h>
			
 
				+#include <AK/GenericLexer.h>
			
 
				+#include <AK/StringBuilder.h>
			
 
				+#include <LibIMAP/MessageHeaderEncoding.h>
			
 
				+#include <LibTextCodec/Decoder.h>
			
 
				+
			
 
				+namespace IMAP {
			
 
				+
			
 
				+ErrorOr<ByteBuffer> decode_rfc2047_encoded_words(StringView input)
			
 
				+{
			
 
				+    GenericLexer lexer(input);
			
 
				+    StringBuilder output;
			
 
				+
			
 
				+    while (!lexer.is_eof()) {
			
 
				+        auto ascii_view = lexer.consume_until("=?"sv);
			
 
				+        DeprecatedString ascii = ascii_view.replace("\r"sv, " "sv, ReplaceMode::All);
			
 
				+        ascii = ascii.replace("\n"sv, " "sv, ReplaceMode::All);
			
 
				+        TRY(output.try_append(ascii));
			
 
				+        if (lexer.is_eof())
			
 
				+            break;
			
 
				+        lexer.consume_specific("=?"sv);
			
 
				+        auto charset = lexer.consume_until('?');
			
 
				+        lexer.consume();
			
 
				+        auto encoding = lexer.consume_until('?');
			
 
				+        lexer.consume();
			
 
				+        auto encoded_text = lexer.consume_until("?=");
			
 
				+        lexer.consume_specific("?="sv);
			
 
				+
			
 
				+        // RFC 2047 Section 6.2, "...any 'linear-white-space' that separates a pair of adjacent 'encoded-word's is ignored."
			
 
				+        // https://datatracker.ietf.org/doc/html/rfc2047#section-6.2
			
 
				+        bool found_next_start = false;
			
 
				+        int spaces = 0;
			
 
				+        for (size_t i = 0; i < lexer.tell_remaining(); ++i) {
			
 
				+            if (lexer.peek(i) == ' ' || lexer.peek(i) == '\r' || lexer.peek(i) == '\n') {
			
 
				+                spaces++;
			
 
				+                if (lexer.peek(i + 1) == '=' && lexer.peek(i + 2) == '?') {
			
 
				+                    found_next_start = true;
			
 
				+                    break;
			
 
				+                }
			
 
				+            } else {
			
 
				+                break;
			
 
				+            }
			
 
				+        }
			
 
				+        if (found_next_start) {
			
 
				+            for (int i = 0; i < spaces; i++) {
			
 
				+                lexer.consume();
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        ByteBuffer first_pass_decoded;
			
 
				+        if (encoding == 'Q' || encoding == 'q') {
			
 
				+            auto maybe_decoded_data = decode_quoted_printable(encoded_text);
			
 
				+            if (maybe_decoded_data.is_error()) {
			
 
				+                dbgln("Failed to decode quoted-printable rfc2047 text, skipping.");
			
 
				+                continue;
			
 
				+            }
			
 
				+            // RFC 2047 Section 4.2.2, https://datatracker.ietf.org/doc/html/rfc2047#section-4.2
			
 
				+            auto decoded_data = maybe_decoded_data.release_value();
			
 
				+            for (auto character : decoded_data.bytes()) {
			
 
				+                if (character == '_')
			
 
				+                    first_pass_decoded.append(' ');
			
 
				+                else
			
 
				+                    first_pass_decoded.append(character);
			
 
				+            }
			
 
				+        } else if (encoding == 'B' || encoding == 'b') {
			
 
				+            auto maybe_decoded_data = AK::decode_base64(encoded_text);
			
 
				+            if (maybe_decoded_data.is_error()) {
			
 
				+                dbgln("Failed to decode base64-encoded rfc2047 text, skipping.");
			
 
				+                continue;
			
 
				+            }
			
 
				+            first_pass_decoded = maybe_decoded_data.release_value();
			
 
				+        } else {
			
 
				+            dbgln("Unknown encoding \"{}\" found, skipping, original string: \"{}\"", encoding, input);
			
 
				+            continue;
			
 
				+        }
			
 
				+        if (first_pass_decoded.is_empty())
			
 
				+            continue;
			
 
				+        auto maybe_decoder = TextCodec::decoder_for(charset);
			
 
				+        if (!maybe_decoder.has_value()) {
			
 
				+            dbgln("No decoder found for charset \"{}\", skipping.", charset);
			
 
				+            continue;
			
 
				+        }
			
 
				+        auto decoded_text = TRY(maybe_decoder->to_utf8(first_pass_decoded));
			
 
				+        TRY(output.try_append(decoded_text));
			
 
				+    }
			
 
				+
			
 
				+    return output.to_byte_buffer();
			
 
				+}
			
 
				+
			
 
				+}
			
--- a/Userland/Libraries/LibIMAP/MessageHeaderEncoding.h
+++ b/Userland/Libraries/LibIMAP/MessageHeaderEncoding.h
@@ -0,0 +1,15 @@
 
				+/*
			
 
				+ * Copyright (c) 2023, Valtteri Koskivuori <vkoskiv@gmail.com>
			
 
				+ *
			
 
				+ * SPDX-License-Identifier: BSD-2-Clause
			
 
				+ */
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include <AK/ByteBuffer.h>
			
 
				+
			
 
				+namespace IMAP {
			
 
				+
			
 
				+ErrorOr<ByteBuffer> decode_rfc2047_encoded_words(StringView input);
			
 
				+
			
 
				+}