///! # Get message as HTML. ///! ///! Use `Message.has_html()` to check if the UI shall render a ///! corresponding button and `MsgId.get_html()` to get the full message. ///! ///! Even when the original mime-message is not HTML, ///! `MsgId.get_html()` will return HTML - ///! this allows nice quoting, handling linebreaks properly etc. use futures::future::FutureExt; use std::future::Future; use std::pin::Pin; use lettre_email::mime::{self, Mime}; use crate::context::Context; use crate::error::Result; use crate::headerdef::{HeaderDef, HeaderDefMap}; use crate::message::{Message, MsgId}; use crate::mimeparser::parse_message_id; use crate::plaintext::PlainText; use mailparse::ParsedContentType; impl Message { /// Check if the message can be retrieved as HTML. /// Typically, this is the case, when the mime structure of a Message is modified, /// meaning that some text is cut or the original message /// is in HTML and `simplify()` may hide some maybe important information. /// The corresponding ffi-function is `dc_msg_has_html()`. /// To get the HTML-code of the message, use `MsgId.get_html()`. pub fn has_html(&self) -> bool { self.mime_modified } } /// Type defining a rough mime-type. /// This is mainly useful on iterating /// to decide whether a mime-part has subtypes. enum MimeMultipartType { Multiple, Single, Message, } /// Function takes a content type from a ParsedMail structure /// and checks and returns the rough mime-type. async fn get_mime_multipart_type(ctype: &ParsedContentType) -> MimeMultipartType { let mimetype = ctype.mimetype.to_lowercase(); if mimetype.starts_with("multipart") && ctype.params.get("boundary").is_some() { MimeMultipartType::Multiple } else if mimetype == "message/rfc822" { MimeMultipartType::Message } else { MimeMultipartType::Single } } /// HtmlMsgParser converts a mime-message to HTML. #[derive(Debug)] struct HtmlMsgParser { pub html: String, pub plain: Option, } impl HtmlMsgParser { /// Function takes a raw mime-message string, /// searches for the main-text part /// and returns that as parser.html pub async fn from_bytes(context: &Context, rawmime: &[u8]) -> Result<Self> { let mut parser = HtmlMsgParser { html: "".to_string(), plain: None, }; let parsedmail = mailparse::parse_mail(rawmime)?; parser.collect_texts_recursive(context, &parsedmail).await?; if parser.html.is_empty() { if let Some(plain) = &parser.plain { parser.html = plain.to_html().await; } } else { parser.cid_to_data_recursive(context, &parsedmail).await?; } Ok(parser) } /// Function iterates over all mime-parts /// and searches for text/plain and text/html parts and saves the /// last one found /// in the corresponding structure fields. /// Usually, there is at most one plain-text and one HTML-text part. fn collect_texts_recursive<'a>( &'a mut self, context: &'a Context, mail: &'a mailparse::ParsedMail<'a>, ) -> Pin<Box<dyn Future<Output = Result<()>> + 'a + Send>> { // Boxed future to deal with recursion async move { match get_mime_multipart_type(&mail.ctype).await { MimeMultipartType::Multiple => { for cur_data in mail.subparts.iter() { self.collect_texts_recursive(context, cur_data).await? } Ok(()) } MimeMultipartType::Message => { let raw = mail.get_body_raw()?; if raw.is_empty() { return Ok(()); } let mail = mailparse::parse_mail(&raw).unwrap(); self.collect_texts_recursive(context, &mail).await } MimeMultipartType::Single => { let mimetype = mail.ctype.mimetype.parse::<Mime>()?; if mimetype == mime::TEXT_HTML { if let Ok(decoded_data) = mail.get_body() { self.html = decoded_data; return Ok(()); } } else if mimetype == mime::TEXT_PLAIN { if let Ok(decoded_data) = mail.get_body() { self.plain = Some(PlainText { text: decoded_data, flowed: if let Some(format) = mail.ctype.params.get("format") { format.as_str().to_ascii_lowercase() == "flowed" } else { false }, delsp: if let Some(delsp) = mail.ctype.params.get("delsp") { delsp.as_str().to_ascii_lowercase() == "yes" } else { false }, }); return Ok(()); } } Ok(()) } } } .boxed() } /// Replace cid:-protocol by the data:-protocol where appropriate. /// This allows the final html-file to be self-contained. fn cid_to_data_recursive<'a>( &'a mut self, context: &'a Context, mail: &'a mailparse::ParsedMail<'a>, ) -> Pin<Box<dyn Future<Output = Result<()>> + 'a + Send>> { // Boxed future to deal with recursion async move { match get_mime_multipart_type(&mail.ctype).await { MimeMultipartType::Multiple => { for cur_data in mail.subparts.iter() { self.cid_to_data_recursive(context, cur_data).await?; } Ok(()) } MimeMultipartType::Message => { let raw = mail.get_body_raw()?; if raw.is_empty() { return Ok(()); } let mail = mailparse::parse_mail(&raw).unwrap(); self.cid_to_data_recursive(context, &mail).await } MimeMultipartType::Single => { let mimetype = mail.ctype.mimetype.parse::<Mime>()?; if mimetype.type_() == mime::IMAGE { if let Some(cid) = mail.headers.get_header_value(HeaderDef::ContentId) { if let Ok(cid) = parse_message_id(&cid) { if let Ok(replacement) = mimepart_to_data_url(&mail).await { let re_string = format!( "(<img[^>]*src[^>]*=[^>]*)(cid:{})([^>]*>)", regex::escape(&cid) ); match regex::Regex::new(&re_string) { Ok(re) => { self.html = re .replace_all( &*self.html, format!("${{1}}{}${{3}}", replacement).as_str(), ) .as_ref() .to_string() } Err(e) => warn!( context, "Cannot create regex for cid: {} throws {}", re_string, e ), } } } } } Ok(()) } } } .boxed() } } /// Convert a mime part to a data: url as defined in [RFC 2397](https://tools.ietf.org/html/rfc2397). async fn mimepart_to_data_url(mail: &mailparse::ParsedMail<'_>) -> Result<String> { let data = mail.get_body_raw()?; let data = base64::encode(&data); Ok(format!("data:{};base64,{}", mail.ctype.mimetype, data)) } impl MsgId { /// Get HTML from a message-id. /// This requires `mime_headers` field to be set for the message; /// this is the case at least when `Message.has_html()` returns true /// (we do not save raw mime unconditionally in the database to save space). /// The corresponding ffi-function is `dc_get_msg_html()`. pub async fn get_html(self, context: &Context) -> String { let rawmime: Option<String> = context .sql .query_get_value( context, "SELECT mime_headers FROM msgs WHERE id=?;", paramsv![self], ) .await; if let Some(rawmime) = rawmime { match HtmlMsgParser::from_bytes(context, rawmime.as_bytes()).await { Err(err) => format!("parser error: {}", err), Ok(parser) => parser.html, } } else { format!("parser error: no mime for {}", self) } } } #[cfg(test)] mod tests { use super::*; use crate::test_utils::*; #[async_std::test] async fn test_htmlparse_plain_unspecified() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml"); let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert_eq!( parser.html, r##"<!DOCTYPE html> <html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> This message does not have Content-Type nor Subject.<br/> <br/> </body></html> "## ); } #[async_std::test] async fn test_htmlparse_plain_iso88591() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_plain_iso88591.eml"); let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert_eq!( parser.html, r##"<!DOCTYPE html> <html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> message with a non-UTF-8 encoding: äöüßÄÖÜ<br/> <br/> </body></html> "## ); } #[async_std::test] async fn test_htmlparse_plain_flowed() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_plain_flowed.eml"); let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert!(parser.plain.unwrap().flowed); assert_eq!( parser.html, r##"<!DOCTYPE html> <html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> This line ends with a space and will be merged with the next one due to format=flowed.<br/> <br/> This line does not end with a space<br/> and will be wrapped as usual.<br/> <br/> </body></html> "## ); } #[async_std::test] async fn test_htmlparse_alt_plain() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_alt_plain.eml"); let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert_eq!( parser.html, r##"<!DOCTYPE html> <html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> mime-modified should not be set set as there is no html and no special stuff;<br/> although not being a delta-message.<br/> test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x27; :)<br/> <br/> <br/> </body></html> "## ); } #[async_std::test] async fn test_htmlparse_html() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_html.eml"); let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); // on windows, `\r\n` linends are returned from mimeparser, // however, rust multiline-strings use just `\n`; // therefore, we just remove `\r` before comparison. assert_eq!( parser.html.replace("\r", ""), r##" <html> <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p> </html>"## ); } #[async_std::test] async fn test_htmlparse_alt_html() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_alt_html.eml"); let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert_eq!( parser.html.replace("\r", ""), // see comment in test_htmlparse_html() r##"<html> <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p> </html> "## ); } #[async_std::test] async fn test_htmlparse_alt_plain_html() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml"); let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert_eq!( parser.html.replace("\r", ""), // see comment in test_htmlparse_html() r##"<html> <p> this is <b>html</b> </p> </html> "## ); } #[async_std::test] async fn test_htmlparse_apple_cid_jpg() { // load raw mime html-data with related image-part (cid:) // and make sure, Content-Id has angle-brackets that are removed correctly. let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/apple_cid_jpg.eml"); let test = String::from_utf8_lossy(raw); assert!(test .find("Content-Id: <8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box>") .is_some()); assert!(test .find("cid:8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box") .is_some()); assert!(test.find("data:").is_none()); // parsing converts cid: to data: let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert!(parser.html.find("<html>").is_some()); assert!(parser.html.find("Content-Id:").is_none()); assert!(parser .html .find("data:image/jpeg;base64,/9j/4AAQ") .is_some()); assert!(parser.html.find("cid:").is_none()); } }