improve text parts (#221)

This commit is contained in:
Clément DOUIN 2021-10-11 23:04:33 +02:00 committed by GitHub
parent 284929d5dc
commit d21778c35e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 127 additions and 177 deletions

23
Cargo.lock generated
View file

@ -369,7 +369,7 @@ dependencies = [
"chrono", "chrono",
"clap", "clap",
"env_logger", "env_logger",
"htmlescape", "html-escape",
"imap", "imap",
"imap-proto", "imap-proto",
"lettre", "lettre",
@ -400,6 +400,15 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "html-escape"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "816ea801a95538fc5f53c836697b3f8b64a9d664c4f0b91efe1fe7c92e4dbcb7"
dependencies = [
"utf8-width",
]
[[package]] [[package]]
name = "html5ever" name = "html5ever"
version = "0.25.1" version = "0.25.1"
@ -414,12 +423,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "htmlescape"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163"
[[package]] [[package]]
name = "httpdate" name = "httpdate"
version = "1.0.1" version = "1.0.1"
@ -1388,6 +1391,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8-width"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7cf7d77f457ef8dfa11e4cd5933c5ddb5dc52a94664071951219a97710f0a32b"
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "0.8.2" version = "0.8.2"

View file

@ -12,7 +12,7 @@ atty = "0.2.14"
chrono = "0.4.19" chrono = "0.4.19"
clap = { version = "2.33.3", default-features = false, features = ["suggestions", "color"] } clap = { version = "2.33.3", default-features = false, features = ["suggestions", "color"] }
env_logger = "0.8.3" env_logger = "0.8.3"
htmlescape = "0.3.1" html-escape = "0.2.9"
imap = "3.0.0-alpha.4" imap = "3.0.0-alpha.4"
imap-proto = "0.14.3" imap-proto = "0.14.3"
# This commit includes the de/serialization of the ContentType # This commit includes the de/serialization of the ContentType

View file

@ -15,7 +15,7 @@ type Seq<'a> = &'a str;
type PageSize = usize; type PageSize = usize;
type Page = usize; type Page = usize;
type Mbox<'a> = Option<&'a str>; type Mbox<'a> = Option<&'a str>;
type Mime = String; type TextMime<'a> = &'a str;
type Raw = bool; type Raw = bool;
type All = bool; type All = bool;
type RawMsg<'a> = &'a str; type RawMsg<'a> = &'a str;
@ -30,7 +30,7 @@ pub enum Command<'a> {
Forward(Seq<'a>, AttachmentsPaths<'a>), Forward(Seq<'a>, AttachmentsPaths<'a>),
List(Option<PageSize>, Page), List(Option<PageSize>, Page),
Move(Seq<'a>, Mbox<'a>), Move(Seq<'a>, Mbox<'a>),
Read(Seq<'a>, Mime, Raw), Read(Seq<'a>, TextMime<'a>, Raw),
Reply(Seq<'a>, All, AttachmentsPaths<'a>), Reply(Seq<'a>, All, AttachmentsPaths<'a>),
Save(Mbox<'a>, RawMsg<'a>), Save(Mbox<'a>, RawMsg<'a>),
Search(Query, Option<PageSize>, Page), Search(Query, Option<PageSize>, Page),
@ -103,8 +103,8 @@ pub fn matches<'a>(m: &'a ArgMatches) -> Result<Option<Command<'a>>> {
debug!("read command matched"); debug!("read command matched");
let seq = m.value_of("seq").unwrap(); let seq = m.value_of("seq").unwrap();
trace!("seq: {}", seq); trace!("seq: {}", seq);
let mime = format!("text/{}", m.value_of("mime-type").unwrap()); let mime = m.value_of("mime-type").unwrap();
trace!("mime: {}", mime); trace!("text mime: {}", mime);
let raw = m.is_present("raw"); let raw = m.is_present("raw");
trace!("raw: {}", raw); trace!("raw: {}", raw);
return Ok(Some(Command::Read(seq, mime, raw))); return Ok(Some(Command::Read(seq, mime, raw)));

View file

@ -1,24 +1,24 @@
use ammonia; use ammonia;
use anyhow::{anyhow, Context, Error, Result}; use anyhow::{anyhow, Context, Error, Result};
use chrono::{DateTime, FixedOffset}; use chrono::{DateTime, FixedOffset};
use htmlescape; use html_escape;
use imap::types::Flag; use imap::types::Flag;
use lettre::message::{Attachment, MultiPart, SinglePart}; use lettre::message::{Attachment, MultiPart, SinglePart};
use regex::Regex; use regex::Regex;
use rfc2047_decoder; use rfc2047_decoder;
use serde::Serialize;
use std::{ use std::{
collections::HashSet,
convert::{TryFrom, TryInto}, convert::{TryFrom, TryInto},
fmt, fs, fs,
path::PathBuf, path::PathBuf,
}; };
use crate::{ use crate::{
config::Account, config::{Account, DEFAULT_SIG_DELIM},
domain::{ domain::{
imap::ImapServiceInterface, imap::ImapServiceInterface,
mbox::Mbox, mbox::Mbox,
msg::{msg_utils, Flags, Parts, TextHtmlPart, TextPlainPart, Tpl, TplOverride}, msg::{msg_utils, BinaryPart, Flags, Part, Parts, TextPlainPart, Tpl, TplOverride},
smtp::SmtpServiceInterface, smtp::SmtpServiceInterface,
}, },
output::OutputServiceInterface, output::OutputServiceInterface,
@ -28,8 +28,6 @@ use crate::{
}, },
}; };
use super::{BinaryPart, Part};
type Addr = lettre::message::Mailbox; type Addr = lettre::message::Mailbox;
/// Representation of a message. /// Representation of a message.
@ -66,39 +64,66 @@ impl Msg {
self.parts self.parts
.iter() .iter()
.filter_map(|part| match part { .filter_map(|part| match part {
Part::Binary(part) => Some(part.clone()), Part::Binary(part) => Some(part.to_owned()),
_ => None, _ => None,
}) })
.collect() .collect()
} }
pub fn join_text_plain_parts(&self) -> String { /// Fold string body from all plain text parts into a single string body. If no plain text
let text_parts = self /// parts are found, HTML parts are used instead. The result is sanitized (all HTML markup is
.parts /// removed).
.iter() pub fn fold_text_plain_parts(&self) -> String {
.filter_map(|part| match part { let (plain, html) = self.parts.iter().fold(
Part::TextPlain(part) => Some(part.content.to_owned()), (String::default(), String::default()),
_ => None, |(mut plain, mut html), part| {
}) match part {
.collect::<Vec<_>>() Part::TextPlain(part) => {
.join("\n\n"); let glue = if plain.is_empty() { "" } else { "\n\n" };
let text_parts = ammonia::Builder::new() plain.push_str(glue);
.tags(Default::default()) plain.push_str(&part.content);
.clean(&text_parts) }
.to_string(); Part::TextHtml(part) => {
let text_parts = match htmlescape::decode_html(&text_parts) { let glue = if html.is_empty() { "" } else { "\n\n" };
Ok(text_parts) => text_parts, html.push_str(glue);
Err(_) => text_parts, html.push_str(&part.content);
}
_ => (),
}; };
text_parts (plain, html)
},
);
if plain.is_empty() {
// Remove HTML markup
let sanitized_html = ammonia::Builder::new()
.tags(HashSet::default())
.clean(&html)
.to_string();
// Replace `&nbsp;` by regular space
let sanitized_html = Regex::new(r"&nbsp;")
.unwrap()
.replace_all(&sanitized_html, " ")
.to_string();
// Merge new line chars
let sanitized_html = Regex::new(r"(\r?\n ?){2,}")
.unwrap()
.replace_all(&sanitized_html, "\n\n")
.to_string();
// Decode HTML entities
let sanitized_html = html_escape::decode_html_entities(&sanitized_html).to_string();
sanitized_html
} else {
plain
}
} }
pub fn join_text_html_parts(&self) -> String { /// Fold string body from all HTML parts into a single string body.
fn fold_text_html_parts(&self) -> String {
let text_parts = self let text_parts = self
.parts .parts
.iter() .iter()
.filter_map(|part| match part { .filter_map(|part| match part {
Part::TextPlain(part) => Some(part.content.to_owned()), Part::TextHtml(part) => Some(part.content.to_owned()),
_ => None, _ => None,
}) })
.collect::<Vec<_>>() .collect::<Vec<_>>()
@ -110,12 +135,13 @@ impl Msg {
text_parts text_parts
} }
pub fn join_text_parts(&self) -> String { /// Fold string body from all text parts into a single string body. The mime allows users to
let text_parts = self.join_text_plain_parts(); /// choose between plain text parts and html text parts.
if text_parts.is_empty() { pub fn fold_text_parts(&self, text_mime: &str) -> String {
self.join_text_html_parts() if text_mime == "html" {
self.fold_text_html_parts()
} else { } else {
text_parts self.fold_text_plain_parts()
} }
} }
@ -161,7 +187,7 @@ impl Msg {
self.subject = format!("Re: {}", self.subject); self.subject = format!("Re: {}", self.subject);
} }
// Text plain parts // Body
let plain_content = { let plain_content = {
let date = self let date = self
.date .date
@ -178,8 +204,8 @@ impl Msg {
let mut content = format!("\n\nOn {}, {} wrote:\n", date, sender); let mut content = format!("\n\nOn {}, {} wrote:\n", date, sender);
let mut glue = ""; let mut glue = "";
for line in self.join_text_plain_parts().trim().lines() { for line in self.fold_text_parts("plain").trim().lines() {
if line == "-- \n" { if line == DEFAULT_SIG_DELIM {
break; break;
} }
content.push_str(glue); content.push_str(glue);
@ -192,50 +218,7 @@ impl Msg {
content content
}; };
// Text HTML parts self.parts = Parts(vec![Part::new_text_plain(plain_content)]);
let html_content = {
let date = self
.date
.as_ref()
.map(|date| date.format("%d %b %Y, at %H:%M").to_string())
.unwrap_or("unknown date".into());
let sender = self
.reply_to
.as_ref()
.or(self.from.as_ref())
.and_then(|addrs| addrs.first())
.map(|addr| addr.name.to_owned().unwrap_or(addr.email.to_string()))
.unwrap_or("unknown sender".into());
let mut content = format!("\n\nOn {}, {} wrote:\n", date, sender);
let mut glue = "";
for line in self.join_text_html_parts().trim().lines() {
if line == "-- \n" {
break;
}
content.push_str(glue);
content.push_str(">");
content.push_str(if line.starts_with(">") { "" } else { " " });
content.push_str(line);
glue = "\n";
}
content
};
self.parts = Parts::default();
if !plain_content.is_empty() {
self.parts.push(Part::TextPlain(TextPlainPart {
content: plain_content,
}));
}
if !html_content.is_empty() {
self.parts.push(Part::TextHtml(TextHtmlPart {
content: html_content,
}));
}
Ok(self) Ok(self)
} }
@ -271,8 +254,7 @@ impl Msg {
self.subject = format!("Fwd: {}", self.subject); self.subject = format!("Fwd: {}", self.subject);
} }
// Text plain parts // Body
{
let mut content = String::default(); let mut content = String::default();
content.push_str("\n\n-------- Forwarded Message --------\n"); content.push_str("\n\n-------- Forwarded Message --------\n");
content.push_str(&format!("Subject: {}\n", prev_subject)); content.push_str(&format!("Subject: {}\n", prev_subject));
@ -300,44 +282,9 @@ impl Msg {
content.push_str("\n"); content.push_str("\n");
} }
content.push_str("\n"); content.push_str("\n");
content.push_str(&self.join_text_plain_parts()); content.push_str(&self.fold_text_parts("plain"));
self.parts self.parts
.replace_text_plain_parts_with(TextPlainPart { content }) .replace_text_plain_parts_with(TextPlainPart { content });
}
// Text HTML parts
{
let mut content = String::default();
content.push_str("\n\n-------- Forwarded Message --------\n");
content.push_str(&format!("Subject: {}\n", prev_subject));
if let Some(date) = prev_date {
content.push_str(&format!("Date: {}\n", date.to_rfc2822()));
}
if let Some(addrs) = prev_from.as_ref() {
content.push_str("From: ");
let mut glue = "";
for addr in addrs {
content.push_str(glue);
content.push_str(&addr.to_string());
glue = ", ";
}
content.push_str("\n");
}
if let Some(addrs) = prev_to.as_ref() {
content.push_str("To: ");
let mut glue = "";
for addr in addrs {
content.push_str(glue);
content.push_str(&addr.to_string());
glue = ", ";
}
content.push_str("\n");
}
content.push_str("\n");
content.push_str(&self.join_text_html_parts());
self.parts
.replace_text_html_parts_with(TextHtmlPart { content })
}
Ok(self) Ok(self)
} }
@ -628,7 +575,7 @@ impl TryInto<lettre::Message> for &Msg {
}; };
let mut multipart = let mut multipart =
MultiPart::mixed().singlepart(SinglePart::plain(self.join_text_plain_parts())); MultiPart::mixed().singlepart(SinglePart::plain(self.fold_text_plain_parts()));
for part in self.attachments() { for part in self.attachments() {
let filename = part.filename; let filename = part.filename;
@ -803,12 +750,3 @@ pub fn parse_some_addrs(addrs: &Option<Vec<imap_proto::Address>>) -> Result<Opti
None => None, None => None,
}) })
} }
#[derive(Debug, Serialize)]
pub struct PrintableMsg(pub String);
impl fmt::Display for PrintableMsg {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "{}", self.0)
}
}

View file

@ -25,8 +25,6 @@ use crate::{
output::OutputServiceInterface, output::OutputServiceInterface,
}; };
use super::PrintableMsg;
/// Download all attachments from the given message sequence number to the user account downloads /// Download all attachments from the given message sequence number to the user account downloads
/// directory. /// directory.
pub fn attachments<OutputService: OutputServiceInterface, ImapService: ImapServiceInterface>( pub fn attachments<OutputService: OutputServiceInterface, ImapService: ImapServiceInterface>(
@ -203,19 +201,18 @@ pub fn move_<OutputService: OutputServiceInterface, ImapService: ImapServiceInte
/// Read a message by its sequence number. /// Read a message by its sequence number.
pub fn read<OutputService: OutputServiceInterface, ImapService: ImapServiceInterface>( pub fn read<OutputService: OutputServiceInterface, ImapService: ImapServiceInterface>(
seq: &str, seq: &str,
// TODO: use the mime to select the right body text_mime: &str,
_mime: String,
raw: bool, raw: bool,
output: &OutputService, output: &OutputService,
imap: &mut ImapService, imap: &mut ImapService,
) -> Result<()> { ) -> Result<()> {
if raw { let msg = if raw {
let msg = String::from_utf8(imap.find_raw_msg(&seq)?)?; String::from_utf8(imap.find_raw_msg(&seq)?)?
output.print(PrintableMsg(msg))
} else { } else {
let msg = imap.find_msg(&seq)?.join_text_parts(); imap.find_msg(&seq)?.fold_text_parts(text_mime)
output.print(PrintableMsg(msg)) };
}
output.print(msg)
} }
/// Reply to the given message UID. /// Reply to the given message UID.

View file

@ -27,6 +27,12 @@ pub enum Part {
Binary(BinaryPart), Binary(BinaryPart),
} }
impl Part {
pub fn new_text_plain(content: String) -> Self {
Self::TextPlain(TextPlainPart { content })
}
}
#[derive(Debug, Clone, Default, Serialize)] #[derive(Debug, Clone, Default, Serialize)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct Parts(pub Vec<Part>); pub struct Parts(pub Vec<Part>);

View file

@ -83,7 +83,7 @@ impl Tpl {
if let Some(body) = opts.body { if let Some(body) = opts.body {
tpl.push_str(body); tpl.push_str(body);
} else { } else {
tpl.push_str(&msg.join_text_plain_parts()) tpl.push_str(&msg.fold_text_plain_parts())
} }
// Signature // Signature

View file

@ -111,8 +111,8 @@ fn main() -> Result<()> {
Some(msg_arg::Command::Move(seq, target)) => { Some(msg_arg::Command::Move(seq, target)) => {
return msg_handler::move_(seq, target, &output, &mut imap); return msg_handler::move_(seq, target, &output, &mut imap);
} }
Some(msg_arg::Command::Read(seq, mime, raw)) => { Some(msg_arg::Command::Read(seq, text_mime, raw)) => {
return msg_handler::read(seq, mime, raw, &output, &mut imap); return msg_handler::read(seq, text_mime, raw, &output, &mut imap);
} }
Some(msg_arg::Command::Reply(seq, all, atts)) => { Some(msg_arg::Command::Reply(seq, all, atts)) => {
return msg_handler::reply(seq, all, atts, &account, &output, &mut imap, &mut smtp); return msg_handler::reply(seq, all, atts, &account, &output, &mut imap, &mut smtp);