diff --git a/deltachat-ffi/deltachat.h b/deltachat-ffi/deltachat.h index 052f0a621..f340c7020 100644 --- a/deltachat-ffi/deltachat.h +++ b/deltachat-ffi/deltachat.h @@ -1447,6 +1447,54 @@ int dc_set_chat_mute_duration (dc_context_t* context, ui char* dc_get_msg_info (dc_context_t* context, uint32_t msg_id); +/** + * Get uncut message, if available. + * + * Delta Chat tries to break the message in simple parts as plain text or images + * that are retrieved using dc_msg_get_viewtype(), dc_msg_get_text(), dc_msg_get_file() and so on. + * This works totally fine for Delta Chat to Delta Chat communication, + * however, when the counterpart uses another E-Mail-client, this has limits: + * + * - even if we do some good job on removing quotes, + * sometimes one needs to see them + * - HTML-only messages might lose information on conversion to text, + * esp. when there are lots of embedded images + * - even if there is some plain text part for a HTML-message, + * this is often poor and not nicely usable due to long links + * + * In these cases, dc_msg_has_html() returns 1 + * and you can ask dc_get_msg_html() for some HTML-code + * that shows the uncut text (which is close to the original) + * For simplicity, the function _always_ returns HTML-code, + * this removes the need for the UI + * to deal with different formatting options of PLAIN-parts. + * + * **Note:** The returned HTML-code may contain scripts, + * external images that may be misused as hidden read-receipts and so on. + * Taking care of these parts + * while maintaining compatibility with the then generated HTML-code + * is not easily doable, if at all. + * Eg. taking care of tags and attributes is not sufficient, + * we would have to deal with linked content (eg. script, css), + * text (eg. script-blocks) and values (eg. javascript-protocol) as well; + * on this level, we have to deal with encodings, browser peculiarities and so on - + * and would still risk to oversee something and to break things. + * + * To avoid starting this cat-and-mouse game, + * and to close this issue in a sustainable way, + * it is up to the UI to display the HTML-code in an **appropriate sandbox environment** - + * that may eg. be an external browser or a WebView with scripting disabled. + * + * @memberof dc_context_t + * @param context The context object object. + * @param msg_id The message id for which the uncut text should be loaded + * @return Uncut text as HTML. + * In case of errors, NULL is returned. + * The result must be released using dc_str_unref(). + */ +char* dc_get_msg_html (dc_context_t* context, uint32_t msg_id); + + /** * Get the raw mime-headers of the given message. * Raw headers are saved for incoming messages @@ -3603,6 +3651,32 @@ int dc_msg_get_videochat_type (const dc_msg_t* msg); #define DC_VIDEOCHATTYPE_JITSI 2 +/** + * Checks if the message has a full HTML version. + * + * Messages have a full HTML version + * if the original message _may_ contain important parts + * that are removed by some heuristics + * or if the message is just too long or too complex + * to get displayed properly by just using plain text. + * If so, the UI should offer a button as + * "Show full message" that shows the uncut message using dc_get_msg_html(). + * + * Even if a "Show full message" button is recommended, + * the UI should display the text in the bubble + * using the normal dc_msg_get_text() function - + * which will still be fine in many cases. + * + * @memberof dc_msg_t + * @param msg The message object. + * @return 0=Message as displayed using dc_msg_get_text() is just fine; + * 1=The message has a full HTML version, + * should be displayed using dc_msg_get_text() + * and a button to show the full version should be offered + */ +int dc_msg_has_html (dc_msg_t* msg); + + /** * Set the text of a message object. * This does not alter any information in the database; this may be done by dc_send_msg() later. diff --git a/deltachat-ffi/src/lib.rs b/deltachat-ffi/src/lib.rs index 4d3f3253c..dfac0da21 100644 --- a/deltachat-ffi/src/lib.rs +++ b/deltachat-ffi/src/lib.rs @@ -1389,6 +1389,20 @@ pub unsafe extern "C" fn dc_get_msg_info( block_on(message::get_msg_info(&ctx, MsgId::new(msg_id))).strdup() } +#[no_mangle] +pub unsafe extern "C" fn dc_get_msg_html( + context: *mut dc_context_t, + msg_id: u32, +) -> *mut libc::c_char { + if context.is_null() { + eprintln!("ignoring careless call to dc_get_msg_html()"); + return ptr::null_mut(); + } + let ctx = &*context; + + block_on(MsgId::new(msg_id).get_html(&ctx)).strdup() +} + #[no_mangle] pub unsafe extern "C" fn dc_get_mime_headers( context: *mut dc_context_t, @@ -2861,6 +2875,16 @@ pub unsafe extern "C" fn dc_msg_is_setupmessage(msg: *mut dc_msg_t) -> libc::c_i ffi_msg.message.is_setupmessage().into() } +#[no_mangle] +pub unsafe extern "C" fn dc_msg_has_html(msg: *mut dc_msg_t) -> libc::c_int { + if msg.is_null() { + eprintln!("ignoring careless call to dc_msg_has_html()"); + return 0; + } + let ffi_msg = &*msg; + ffi_msg.message.has_html().into() +} + #[no_mangle] pub unsafe extern "C" fn dc_msg_get_videochat_url(msg: *mut dc_msg_t) -> *mut libc::c_char { if msg.is_null() { diff --git a/examples/repl/cmdline.rs b/examples/repl/cmdline.rs index cf1657bee..03ab5a681 100644 --- a/examples/repl/cmdline.rs +++ b/examples/repl/cmdline.rs @@ -21,6 +21,7 @@ use deltachat::qr::*; use deltachat::sql; use deltachat::EventType; use deltachat::{config, provider}; +use std::fs; /// Reset database tables. /// Argument is a bitmask, executing single or multiple actions in one call. @@ -185,7 +186,7 @@ async fn log_msg(context: &Context, prefix: impl AsRef, msg: &Message) { let temp2 = dc_timestamp_to_str(msg.get_timestamp()); let msgtext = msg.get_text(); println!( - "{}{}{}{}: {} (Contact#{}): {} {}{}{}{}{} [{}]", + "{}{}{}{}: {} (Contact#{}): {} {}{}{}{}{}{} [{}]", prefix.as_ref(), msg.get_id(), if msg.get_showpadlock() { "🔒" } else { "" }, @@ -193,6 +194,7 @@ async fn log_msg(context: &Context, prefix: impl AsRef, msg: &Message) { &contact_name, contact_id, msgtext.unwrap_or_default(), + if msg.has_html() { "[HAS-HTML]️" } else { "" }, if msg.get_from_id() == 1 as libc::c_uint { "" } else if msg.get_state() == MessageState::InSeen { @@ -384,6 +386,7 @@ pub async fn cmdline(context: Context, line: &str, chat_id: &mut ChatId) -> Resu ===========================Message commands==\n\ listmsgs \n\ msginfo \n\ + html \n\ listfresh\n\ forward \n\ markseen \n\ @@ -942,6 +945,16 @@ pub async fn cmdline(context: Context, line: &str, chat_id: &mut ChatId) -> Resu let res = message::get_msg_info(&context, id).await; println!("{}", res); } + "html" => { + ensure!(!arg1.is_empty(), "Argument missing."); + let id = MsgId::new(arg1.parse()?); + let file = dirs::home_dir() + .unwrap_or_default() + .join(format!("msg-{}.html", id.to_u32())); + let html = id.get_html(&context).await; + fs::write(&file, html)?; + println!("HTML written to: {:#?}", file); + } "listfresh" => { let msglist = context.get_fresh_msgs().await; diff --git a/src/dc_receive_imf.rs b/src/dc_receive_imf.rs index 191712830..237ee48b1 100644 --- a/src/dc_receive_imf.rs +++ b/src/dc_receive_imf.rs @@ -802,11 +802,24 @@ async fn add_parts( let mut parts = std::mem::replace(&mut mime_parser.parts, Vec::new()); let server_folder = server_folder.as_ref().to_string(); let is_system_message = mime_parser.is_system_message; - let mime_headers = if save_mime_headers { + + // if indicated by the parser, + // we save the full mime-message and add a flag + // that the ui should show button to display the full message. + // + // (currently, we skip saving mime-messages for encrypted messages + // as there is probably no huge intersection between html-messages and encrypted messages, + // however, that should be doable, we need the decrypted mime-structure in this case) + + // a flag used to avoid adding "show full message" button to multiple parts of the message. + let mut save_mime_modified = mime_parser.is_mime_modified && !mime_parser.was_encrypted(); + + let mime_headers = if save_mime_headers || save_mime_modified { Some(String::from_utf8_lossy(imf_raw).to_string()) } else { None }; + let sent_timestamp = *sent_timestamp; let is_hidden = *hidden; let chat_id = *chat_id; @@ -826,8 +839,9 @@ async fn add_parts( "INSERT INTO msgs \ (rfc724_mid, server_folder, server_uid, chat_id, from_id, to_id, timestamp, \ timestamp_sent, timestamp_rcvd, type, state, msgrmsg, txt, txt_raw, param, \ - bytes, hidden, mime_headers, mime_in_reply_to, mime_references, error, ephemeral_timer, ephemeral_timestamp) \ - VALUES (?,?,?,?,?,?, ?,?,?,?,?,?, ?,?,?,?,?,?, ?,?, ?,?,?);", + bytes, hidden, mime_headers, mime_in_reply_to, mime_references, mime_modified, \ + error, ephemeral_timer, ephemeral_timestamp) \ + VALUES (?,?,?,?,?,?,?, ?,?,?,?,?,?,?,?, ?,?,?,?,?,?, ?,?,?);", )?; let is_location_kml = location_kml_is @@ -841,6 +855,12 @@ async fn add_parts( } } + let mime_modified = save_mime_modified && !part.msg.is_empty(); + if mime_modified { + // Avoid setting mime_modified for more than one part. + save_mime_modified = false; + } + if part.typ == Viewtype::Text { let msg_raw = part.msg_raw.as_ref().cloned().unwrap_or_default(); txt_raw = format!("{}\n\n{}", subject, msg_raw); @@ -854,7 +874,9 @@ async fn add_parts( } else { match ephemeral_timer { EphemeralTimer::Disabled => 0, - EphemeralTimer::Enabled { duration } => rcvd_timestamp + i64::from(duration) + EphemeralTimer::Enabled { duration } => { + rcvd_timestamp + i64::from(duration) + } } }; @@ -877,9 +899,14 @@ async fn add_parts( part.param.to_string(), part.bytes as isize, is_hidden, - mime_headers, + if save_mime_headers || mime_modified { + mime_headers.clone() + } else { + None + }, mime_in_reply_to, mime_references, + mime_modified, part.error.take().unwrap_or_default(), ephemeral_timer, ephemeral_timestamp diff --git a/src/dehtml.rs b/src/dehtml.rs index 3464fef85..2eae9547a 100644 --- a/src/dehtml.rs +++ b/src/dehtml.rs @@ -390,9 +390,10 @@ mod tests { let input = include_str!("../test-data/message/gmx-quote-body.eml"); let dehtml = dehtml(input).unwrap(); println!("{}", dehtml); - let (msg, forwawded, top_quote) = simplify(dehtml, false); + let (msg, forwarded, cut, top_quote) = simplify(dehtml, false); assert_eq!(msg, "Test"); - assert_eq!(forwawded, false); + assert_eq!(forwarded, false); + assert_eq!(cut, false); assert_eq!(top_quote.as_deref(), Some("test")); } } diff --git a/src/headerdef.rs b/src/headerdef.rs index cadb300aa..f349df5b9 100644 --- a/src/headerdef.rs +++ b/src/headerdef.rs @@ -21,6 +21,7 @@ pub enum HeaderDef { InReplyTo, Precedence, ContentType, + ContentId, ChatVersion, ChatGroupId, ChatGroupName, diff --git a/src/html.rs b/src/html.rs new file mode 100644 index 000000000..d33d2c529 --- /dev/null +++ b/src/html.rs @@ -0,0 +1,396 @@ +///! # Get message as HTML. +///! +///! Use `Message.has_html()` to check if the UI shall render a +///! corresponding button and `MsgId.get_html()` to get the full message. +///! +///! Even when the original mime-message is not HTML, +///! `MsgId.get_html()` will return HTML - +///! this allows nice quoting, handling linebreaks properly etc. +use futures::future::FutureExt; +use std::future::Future; +use std::pin::Pin; + +use lettre_email::mime::{self, Mime}; + +use crate::context::Context; +use crate::error::Result; +use crate::headerdef::{HeaderDef, HeaderDefMap}; +use crate::message::{Message, MsgId}; +use crate::mimeparser::parse_message_id; +use crate::plaintext::PlainText; +use mailparse::ParsedContentType; + +impl Message { + /// Check if the message can be retrieved as HTML. + /// Typically, this is the case, when the mime structure of a Message is modified, + /// meaning that some text is cut or the original message + /// is in HTML and `simplify()` may hide some maybe important information. + /// The corresponding ffi-function is `dc_msg_has_html()`. + /// To get the HTML-code of the message, use `MsgId.get_html()`. + pub fn has_html(&self) -> bool { + self.mime_modified + } +} + +/// Type defining a rough mime-type. +/// This is mainly useful on iterating +/// to decide whether a mime-part has subtypes. +enum MimeMultipartType { + Multiple, + Single, + Message, +} + +/// Function takes a content type from a ParsedMail structure +/// and checks and returns the rough mime-type. +async fn get_mime_multipart_type(ctype: &ParsedContentType) -> MimeMultipartType { + let mimetype = ctype.mimetype.to_lowercase(); + if mimetype.starts_with("multipart") && ctype.params.get("boundary").is_some() { + MimeMultipartType::Multiple + } else if mimetype == "message/rfc822" { + MimeMultipartType::Message + } else { + MimeMultipartType::Single + } +} + +/// HtmlMsgParser converts a mime-message to HTML. +#[derive(Debug)] +struct HtmlMsgParser { + pub html: String, + pub plain: Option, +} + +impl HtmlMsgParser { + /// Function takes a raw mime-message string, + /// searches for the main-text part + /// and returns that as parser.html + pub async fn from_bytes(context: &Context, rawmime: &[u8]) -> Result<Self> { + let mut parser = HtmlMsgParser { + html: "".to_string(), + plain: None, + }; + + let parsedmail = mailparse::parse_mail(rawmime)?; + + parser.collect_texts_recursive(context, &parsedmail).await?; + + if parser.html.is_empty() { + if let Some(plain) = &parser.plain { + parser.html = plain.to_html().await; + } + } else { + parser.cid_to_data_recursive(context, &parsedmail).await?; + } + + Ok(parser) + } + + /// Function iterates over all mime-parts + /// and searches for text/plain and text/html parts and saves the + /// last one found + /// in the corresponding structure fields. + /// Usually, there is at most one plain-text and one HTML-text part. + fn collect_texts_recursive<'a>( + &'a mut self, + context: &'a Context, + mail: &'a mailparse::ParsedMail<'a>, + ) -> Pin<Box<dyn Future<Output = Result<()>> + 'a + Send>> { + // Boxed future to deal with recursion + async move { + match get_mime_multipart_type(&mail.ctype).await { + MimeMultipartType::Multiple => { + for cur_data in mail.subparts.iter() { + self.collect_texts_recursive(context, cur_data).await? + } + Ok(()) + } + MimeMultipartType::Message => { + let raw = mail.get_body_raw()?; + if raw.is_empty() { + return Ok(()); + } + let mail = mailparse::parse_mail(&raw).unwrap(); + self.collect_texts_recursive(context, &mail).await + } + MimeMultipartType::Single => { + let mimetype = mail.ctype.mimetype.parse::<Mime>()?; + if mimetype == mime::TEXT_HTML { + if let Ok(decoded_data) = mail.get_body() { + self.html = decoded_data; + return Ok(()); + } + } else if mimetype == mime::TEXT_PLAIN { + if let Ok(decoded_data) = mail.get_body() { + self.plain = Some(PlainText { + text: decoded_data, + flowed: if let Some(format) = mail.ctype.params.get("format") { + format.as_str().to_ascii_lowercase() == "flowed" + } else { + false + }, + delsp: if let Some(delsp) = mail.ctype.params.get("delsp") { + delsp.as_str().to_ascii_lowercase() == "yes" + } else { + false + }, + }); + return Ok(()); + } + } + Ok(()) + } + } + } + .boxed() + } + + /// Replace cid:-protocol by the data:-protocol where appropriate. + /// This allows the final html-file to be self-contained. + fn cid_to_data_recursive<'a>( + &'a mut self, + context: &'a Context, + mail: &'a mailparse::ParsedMail<'a>, + ) -> Pin<Box<dyn Future<Output = Result<()>> + 'a + Send>> { + // Boxed future to deal with recursion + async move { + match get_mime_multipart_type(&mail.ctype).await { + MimeMultipartType::Multiple => { + for cur_data in mail.subparts.iter() { + self.cid_to_data_recursive(context, cur_data).await?; + } + Ok(()) + } + MimeMultipartType::Message => { + let raw = mail.get_body_raw()?; + if raw.is_empty() { + return Ok(()); + } + let mail = mailparse::parse_mail(&raw).unwrap(); + self.cid_to_data_recursive(context, &mail).await + } + MimeMultipartType::Single => { + let mimetype = mail.ctype.mimetype.parse::<Mime>()?; + if mimetype.type_() == mime::IMAGE { + if let Some(cid) = mail.headers.get_header_value(HeaderDef::ContentId) { + if let Ok(cid) = parse_message_id(&cid) { + if let Ok(replacement) = mimepart_to_data_url(&mail).await { + let re_string = format!( + "(<img[^>]*src[^>]*=[^>]*)(cid:{})([^>]*>)", + regex::escape(&cid) + ); + match regex::Regex::new(&re_string) { + Ok(re) => { + self.html = re + .replace_all( + &*self.html, + format!("${{1}}{}${{3}}", replacement).as_str(), + ) + .as_ref() + .to_string() + } + Err(e) => warn!( + context, + "Cannot create regex for cid: {} throws {}", + re_string, + e + ), + } + } + } + } + } + Ok(()) + } + } + } + .boxed() + } +} + +/// Convert a mime part to a data: url as defined in [RFC 2397](https://tools.ietf.org/html/rfc2397). +async fn mimepart_to_data_url(mail: &mailparse::ParsedMail<'_>) -> Result<String> { + let data = mail.get_body_raw()?; + let data = base64::encode(&data); + Ok(format!("data:{};base64,{}", mail.ctype.mimetype, data)) +} + +impl MsgId { + /// Get HTML from a message-id. + /// This requires `mime_headers` field to be set for the message; + /// this is the case at least when `Message.has_html()` returns true + /// (we do not save raw mime unconditionally in the database to save space). + /// The corresponding ffi-function is `dc_get_msg_html()`. + pub async fn get_html(self, context: &Context) -> String { + let rawmime: Option<String> = context + .sql + .query_get_value( + context, + "SELECT mime_headers FROM msgs WHERE id=?;", + paramsv![self], + ) + .await; + + if let Some(rawmime) = rawmime { + match HtmlMsgParser::from_bytes(context, rawmime.as_bytes()).await { + Err(err) => format!("parser error: {}", err), + Ok(parser) => parser.html, + } + } else { + format!("parser error: no mime for {}", self) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_utils::*; + + #[async_std::test] + async fn test_htmlparse_plain_unspecified() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml"); + let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + assert_eq!( + parser.html, + r##"<!DOCTYPE html> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> +This message does not have Content-Type nor Subject.<br/> +<br/> +</body></html> +"## + ); + } + + #[async_std::test] + async fn test_htmlparse_plain_iso88591() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_plain_iso88591.eml"); + let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + assert_eq!( + parser.html, + r##"<!DOCTYPE html> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> +message with a non-UTF-8 encoding: äöüßÄÖÜ<br/> +<br/> +</body></html> +"## + ); + } + + #[async_std::test] + async fn test_htmlparse_plain_flowed() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_plain_flowed.eml"); + let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + assert!(parser.plain.unwrap().flowed); + assert_eq!( + parser.html, + r##"<!DOCTYPE html> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> +This line ends with a space and will be merged with the next one due to format=flowed.<br/> +<br/> +This line does not end with a space<br/> +and will be wrapped as usual.<br/> +<br/> +</body></html> +"## + ); + } + + #[async_std::test] + async fn test_htmlparse_alt_plain() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_alt_plain.eml"); + let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + assert_eq!( + parser.html, + r##"<!DOCTYPE html> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> +mime-modified should not be set set as there is no html and no special stuff;<br/> +although not being a delta-message.<br/> +test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x27; :)<br/> +<br/> +<br/> +</body></html> +"## + ); + } + + #[async_std::test] + async fn test_htmlparse_html() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_html.eml"); + let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + + // on windows, `\r\n` linends are returned from mimeparser, + // however, rust multiline-strings use just `\n`; + // therefore, we just remove `\r` before comparison. + assert_eq!( + parser.html.replace("\r", ""), + r##" +<html> + <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p> +</html>"## + ); + } + + #[async_std::test] + async fn test_htmlparse_alt_html() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_alt_html.eml"); + let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + assert_eq!( + parser.html.replace("\r", ""), // see comment in test_htmlparse_html() + r##"<html> + <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p> +</html> + +"## + ); + } + + #[async_std::test] + async fn test_htmlparse_alt_plain_html() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml"); + let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + assert_eq!( + parser.html.replace("\r", ""), // see comment in test_htmlparse_html() + r##"<html> + <p> + this is <b>html</b> + </p> +</html> + +"## + ); + } + + #[async_std::test] + async fn test_htmlparse_apple_cid_jpg() { + // load raw mime html-data with related image-part (cid:) + // and make sure, Content-Id has angle-brackets that are removed correctly. + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/apple_cid_jpg.eml"); + let test = String::from_utf8_lossy(raw); + assert!(test + .find("Content-Id: <8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box>") + .is_some()); + assert!(test + .find("cid:8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box") + .is_some()); + assert!(test.find("data:").is_none()); + + // parsing converts cid: to data: + let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + assert!(parser.html.find("<html>").is_some()); + assert!(parser.html.find("Content-Id:").is_none()); + assert!(parser + .html + .find("data:image/jpeg;base64,/9j/4AAQ") + .is_some()); + assert!(parser.html.find("cid:").is_none()); + } +} diff --git a/src/lib.rs b/src/lib.rs index c21980b55..251891eaf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,6 +76,8 @@ pub mod stock; mod token; #[macro_use] mod dehtml; +pub mod html; +pub mod plaintext; pub mod dc_receive_imf; pub mod dc_tools; diff --git a/src/message.rs b/src/message.rs index d35a437d2..c62cfb24a 100644 --- a/src/message.rs +++ b/src/message.rs @@ -110,7 +110,7 @@ impl MsgId { context .sql .execute( - "UPDATE msgs SET chat_id=?, txt='', txt_raw='', from_id=0, to_id=0, param='' WHERE id=?", + "UPDATE msgs SET chat_id=?, txt='', txt_raw='', mime_headers='', from_id=0, to_id=0, param='' WHERE id=?", paramsv![chat_id, self], ) .await?; @@ -264,6 +264,7 @@ pub struct Message { pub(crate) server_folder: Option<String>, pub(crate) server_uid: u32, pub(crate) is_dc_message: MessengerMessage, + pub(crate) mime_modified: bool, pub(crate) chat_blocked: Blocked, pub(crate) location_id: u32, error: Option<String>, @@ -305,6 +306,7 @@ impl Message { " m.state AS state,", " m.error AS error,", " m.msgrmsg AS msgrmsg,", + " m.mime_modified AS mime_modified,", " m.txt AS txt,", " m.param AS param,", " m.hidden AS hidden,", @@ -335,6 +337,7 @@ impl Message { let error: String = row.get("error")?; msg.error = Some(error).filter(|error| !error.is_empty()); msg.is_dc_message = row.get("msgrmsg")?; + msg.mime_modified = row.get("mime_modified")?; let text; if let rusqlite::types::ValueRef::Text(buf) = row.get_raw("txt") { diff --git a/src/mimeparser.rs b/src/mimeparser.rs index 31e34b6b9..474fb6689 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -65,6 +65,10 @@ pub struct MimeMessage { pub(crate) group_avatar: Option<AvatarAction>, pub(crate) mdn_reports: Vec<Report>, pub(crate) failure_report: Option<FailureReport>, + + // if this flag is set, the parts/text/etc. are just close to the original mime-message; + // clients should offer a way to view the original message in this case + pub is_mime_modified: bool, } #[derive(Debug, PartialEq)] @@ -223,6 +227,7 @@ impl MimeMessage { user_avatar: None, group_avatar: None, failure_report: None, + is_mime_modified: false, }; parser.parse_mime_recursive(context, &mail).await?; parser.maybe_remove_bad_parts(); @@ -598,6 +603,12 @@ impl MimeMessage { } } } + if any_part_added && mail.subparts.len() > 1 { + // there are other alternative parts, likely HTML, + // so we might have missed some content on simplifying. + // set mime-modified to force the ui to display a show-message button. + self.is_mime_modified = true; + } } (mime::MULTIPART, "encrypted") => { // we currently do not try to decrypt non-autocrypt messages @@ -727,20 +738,26 @@ impl MimeMessage { let mut dehtml_failed = false; - let (simplified_txt, is_forwarded, top_quote) = if decoded_data.is_empty() { - ("".to_string(), false, None) - } else { - let is_html = mime_type == mime::TEXT_HTML; - let out = if is_html { - dehtml(&decoded_data).unwrap_or_else(|| { - dehtml_failed = true; - decoded_data.clone() - }) + let (simplified_txt, is_forwarded, is_cut, top_quote) = + if decoded_data.is_empty() { + ("".to_string(), false, false, None) } else { - decoded_data.clone() + let is_html = mime_type == mime::TEXT_HTML; + let out = if is_html { + self.is_mime_modified = true; + dehtml(&decoded_data).unwrap_or_else(|| { + dehtml_failed = true; + decoded_data.clone() + }) + } else { + decoded_data.clone() + }; + simplify(out, self.has_chat_version()) }; - simplify(out, self.has_chat_version()) - }; + + self.is_mime_modified = self.is_mime_modified + || ((is_forwarded || is_cut || top_quote.is_some()) + && !self.has_chat_version()); let is_format_flowed = if let Some(format) = mail.ctype.params.get("format") { @@ -2562,4 +2579,67 @@ On 2020-10-25, Bob wrote: assert_eq!(msg.get_height(), 64); assert_eq!(msg.get_filemime().unwrap(), "image/png"); } + + #[async_std::test] + async fn test_mime_modified_plain() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml"); + let mimeparser = MimeMessage::from_bytes(&t.ctx, raw).await.unwrap(); + assert!(!mimeparser.is_mime_modified); + assert_eq!( + mimeparser.parts[0].msg, + "This message does not have Content-Type nor Subject." + ); + } + + #[async_std::test] + async fn test_mime_modified_alt_plain_html() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml"); + let mimeparser = MimeMessage::from_bytes(&t.ctx, raw).await.unwrap(); + assert!(mimeparser.is_mime_modified); + assert_eq!( + mimeparser.parts[0].msg, + "mime-modified test – this is plain" + ); + } + + #[async_std::test] + async fn test_mime_modified_alt_plain() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_alt_plain.eml"); + let mimeparser = MimeMessage::from_bytes(&t.ctx, raw).await.unwrap(); + assert!(!mimeparser.is_mime_modified); + assert_eq!( + mimeparser.parts[0].msg, + "mime-modified test – \ + mime-modified should not be set set as there is no html and no special stuff;\n\ + although not being a delta-message.\n\ + test some special html-characters as < > and & but also \" and ' :)" + ); + } + + #[async_std::test] + async fn test_mime_modified_alt_html() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_alt_html.eml"); + let mimeparser = MimeMessage::from_bytes(&t.ctx, raw).await.unwrap(); + assert!(mimeparser.is_mime_modified); + assert_eq!( + mimeparser.parts[0].msg, + "mime-modified test – mime-modified *set*; simplify is always regarded as lossy." + ); + } + + #[async_std::test] + async fn test_mime_modified_html() { + let t = TestContext::new().await; + let raw = include_bytes!("../test-data/message/text_html.eml"); + let mimeparser = MimeMessage::from_bytes(&t.ctx, raw).await.unwrap(); + assert!(mimeparser.is_mime_modified); + assert_eq!( + mimeparser.parts[0].msg, + "mime-modified test – mime-modified *set*; simplify is always regarded as lossy." + ); + } } diff --git a/src/plaintext.rs b/src/plaintext.rs new file mode 100644 index 000000000..e78d3f85a --- /dev/null +++ b/src/plaintext.rs @@ -0,0 +1,252 @@ +///! Handle plain text together with some attributes. +use crate::simplify::split_lines; +use once_cell::sync::Lazy; + +#[derive(Debug)] +pub struct PlainText { + pub text: String, + + /// Text may "flowed" as defined in [RFC 2646](https://tools.ietf.org/html/rfc2646). + /// At a glance, that means, if a line ends with a space, it is merged with the next one + /// and the first leading spaces is ignored + /// (to allow lines starting with `>` that normally indicates a quote) + pub flowed: bool, + + /// If set together with "flowed", + /// The space indicating merging two lines is removed. + pub delsp: bool, +} + +impl PlainText { + /// Convert plain text to HTML. + /// The function handles quotes, links, fixed and floating text paragraphs. + pub async fn to_html(&self) -> String { + static LINKIFY_MAIL_RE: Lazy<regex::Regex> = + Lazy::new(|| regex::Regex::new(r#"\b([\w.\-+]+@[\w.\-]+)\b"#).unwrap()); + + static LINKIFY_URL_RE: Lazy<regex::Regex> = Lazy::new(|| { + regex::Regex::new(r#"\b((http|https|ftp|ftps):[\w.,:;$/@!?&%\-~=#+]+)"#).unwrap() + }); + + let lines = split_lines(&self.text); + + let mut ret = + "<!DOCTYPE html>\n<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>\n".to_string(); + + for line in lines { + let is_quote = line.starts_with('>'); + + // we need to do html-entity-encoding after linkify, as otherwise encapsulated links + // as <http://example.org> cannot be handled correctly + // (they would become &lt;http://example.org&gt; where the trailing &gt; would become a valid url part). + // to avoid double encoding, we escape our html-entities by \r that must not be used in the string elsewhere. + let line = line.to_string().replace("\r", ""); + + let mut line = LINKIFY_MAIL_RE + .replace_all(&*line, "\rLTa href=\rQUOTmailto:$1\rQUOT\rGT$1\rLT/a\rGT") + .as_ref() + .to_string(); + + line = LINKIFY_URL_RE + .replace_all(&*line, "\rLTa href=\rQUOT$1\rQUOT\rGT$1\rLT/a\rGT") + .as_ref() + .to_string(); + + // encode html-entities after linkify the raw string + line = escaper::encode_minimal(&line); + + // make our escaped html-entities real after encoding all others + line = line.replace("\rLT", "<"); + line = line.replace("\rGT", ">"); + line = line.replace("\rQUOT", "\""); + + if self.flowed { + // flowed text as of RFC 3676 - + // a leading space shall be removed + // and is only there to allow > at the beginning of a line that is no quote. + line = line.strip_prefix(" ").unwrap_or(&line).to_string(); + if is_quote { + line = "<em>".to_owned() + &line + "</em>"; + } + + // a trailing space indicates that the line can be merged with the next one; + // for sake of simplicity, we skip merging for quotes (quotes may be combined with + // delsp, so `> >` is different from `>>` etc. see RFC 3676 for details) + if line.ends_with(' ') && !is_quote { + if self.delsp { + line.pop(); + } + } else { + line += "<br/>\n"; + } + } else { + // normal, fixed text + if is_quote { + line = "<em>".to_owned() + &line + "</em>"; + } + line += "<br/>\n"; + } + + ret += &*line; + } + ret += "</body></html>\n"; + ret + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[async_std::test] + async fn test_plain_to_html() { + let html = PlainText { + text: r##"line 1 +line 2 +line with https://link-mid-of-line.org and http://link-end-of-line.com/file?foo=bar%20 +http://link-at-start-of-line.org +"## + .to_string(), + flowed: false, + delsp: false, + } + .to_html() + .await; + assert_eq!( + html, + r##"<!DOCTYPE html> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> +line 1<br/> +line 2<br/> +line with <a href="https://link-mid-of-line.org">https://link-mid-of-line.org</a> and <a href="http://link-end-of-line.com/file?foo=bar%20">http://link-end-of-line.com/file?foo=bar%20</a><br/> +<a href="http://link-at-start-of-line.org">http://link-at-start-of-line.org</a><br/> +<br/> +</body></html> +"## + ); + } + + #[async_std::test] + async fn test_plain_to_html_encapsulated() { + let html = PlainText { + text: r#"line with <http://encapsulated.link/?foo=_bar> here!"#.to_string(), + flowed: false, + delsp: false, + } + .to_html() + .await; + assert_eq!( + html, + r#"<!DOCTYPE html> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> +line with &lt;<a href="http://encapsulated.link/?foo=_bar">http://encapsulated.link/?foo=_bar</a>&gt; here!<br/> +</body></html> +"# + ); + } + + #[async_std::test] + async fn test_plain_to_html_nolink() { + let html = PlainText { + text: r#"line with nohttp://no.link here"#.to_string(), + flowed: false, + delsp: false, + } + .to_html() + .await; + assert_eq!( + html, + r#"<!DOCTYPE html> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> +line with nohttp://no.link here<br/> +</body></html> +"# + ); + } + + #[async_std::test] + async fn test_plain_to_html_mailto() { + let html = PlainText { + text: r#"just an address: foo@bar.org another@one.de"#.to_string(), + flowed: false, + delsp: false, + } + .to_html() + .await; + assert_eq!( + html, + r#"<!DOCTYPE html> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> +just an address: <a href="mailto:foo@bar.org">foo@bar.org</a> <a href="mailto:another@one.de">another@one.de</a><br/> +</body></html> +"# + ); + } + + #[async_std::test] + async fn test_plain_to_html_flowed() { + let html = PlainText { + text: "line \nstill line\n>quote \n>still quote\n >no quote".to_string(), + flowed: true, + delsp: false, + } + .to_html() + .await; + assert_eq!( + html, + r#"<!DOCTYPE html> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> +line still line<br/> +<em>&gt;quote </em><br/> +<em>&gt;still quote</em><br/> +&gt;no quote<br/> +</body></html> +"# + ); + } + + #[async_std::test] + async fn test_plain_to_html_flowed_delsp() { + let html = PlainText { + text: "line \nstill line\n>quote \n>still quote\n >no quote".to_string(), + flowed: true, + delsp: true, + } + .to_html() + .await; + assert_eq!( + html, + r#"<!DOCTYPE html> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> +linestill line<br/> +<em>&gt;quote </em><br/> +<em>&gt;still quote</em><br/> +&gt;no quote<br/> +</body></html> +"# + ); + } + + #[async_std::test] + async fn test_plain_to_html_fixed() { + let html = PlainText { + text: "line \nstill line\n>quote \n>still quote\n >no quote".to_string(), + flowed: false, + delsp: false, + } + .to_html() + .await; + assert_eq!( + html, + r#"<!DOCTYPE html> +<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body> +line <br/> +still line<br/> +<em>&gt;quote </em><br/> +<em>&gt;still quote</em><br/> + &gt;no quote<br/> +</body></html> +"# + ); + } +} diff --git a/src/simplify.rs b/src/simplify.rs index 6c46414e1..bf6f75cba 100644 --- a/src/simplify.rs +++ b/src/simplify.rs @@ -17,13 +17,16 @@ pub fn escape_message_footer_marks(text: &str) -> String { } /// Remove standard (RFC 3676, §4.3) footer if it is found. +/// Returns `(lines, is_footer_removed)` tuple; +/// `is_footer_removed` is set to `true` if the footer was actually removed from `lines` +/// (which is equal to the input array otherwise). #[allow(clippy::indexing_slicing)] -fn remove_message_footer<'a>(lines: &'a [&str]) -> &'a [&'a str] { +fn remove_message_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) { let mut nearly_standard_footer = None; for (ix, &line) in lines.iter().enumerate() { match line { // some providers encode `-- ` to `-- =20` which results in `-- ` - "-- " | "-- " => return &lines[..ix], + "-- " | "-- " => return (&lines[..ix], true), // some providers encode `-- ` to `=2D-` which results in only `--`; // use that only when no other footer is found // and if the line before is empty and the line after is not empty @@ -37,13 +40,15 @@ fn remove_message_footer<'a>(lines: &'a [&str]) -> &'a [&'a str] { } } if let Some(ix) = nearly_standard_footer { - return &lines[..ix]; + return (&lines[..ix], true); } - lines + (lines, false) } -/// Remove nonstandard footer and a boolean indicating whether such -/// footer was removed. +/// Remove nonstandard footer and a boolean indicating whether such footer was removed. +/// Returns `(lines, is_footer_removed)` tuple; +/// `is_footer_removed` is set to `true` if the footer was actually removed from `lines` +/// (which is equal to the input array otherwise). #[allow(clippy::indexing_slicing)] fn remove_nonstandard_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) { for (ix, &line) in lines.iter().enumerate() { @@ -60,20 +65,25 @@ fn remove_nonstandard_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) { (lines, false) } -fn split_lines(buf: &str) -> Vec<&str> { +pub(crate) fn split_lines(buf: &str) -> Vec<&str> { buf.split('\n').collect() } /// Simplify message text for chat display. /// Remove quotes, signatures, trailing empty lines etc. -pub fn simplify(mut input: String, is_chat_message: bool) -> (String, bool, Option<String>) { +/// Returns `(text, is_forwarded, is_cut, quote)` tuple, +/// returning the simplified text and some additional information gained from the input. +pub fn simplify(mut input: String, is_chat_message: bool) -> (String, bool, bool, Option<String>) { + let mut is_cut = false; + input.retain(|c| c != '\r'); let lines = split_lines(&input); let (lines, is_forwarded) = skip_forward_header(&lines); let (lines, mut top_quote) = remove_top_quote(lines); let original_lines = &lines; - let lines = remove_message_footer(lines); + let (lines, footer_removed) = remove_message_footer(lines); + is_cut = is_cut || footer_removed; let text = if is_chat_message { render_message(lines, false) @@ -88,6 +98,7 @@ pub fn simplify(mut input: String, is_chat_message: bool) -> (String, bool, Opti if lines.iter().all(|it| it.trim().is_empty()) { render_message(original_lines, false) } else { + is_cut = is_cut || has_nonstandard_footer || bottom_quote.is_some(); render_message(lines, has_nonstandard_footer || bottom_quote.is_some()) } }; @@ -95,11 +106,13 @@ pub fn simplify(mut input: String, is_chat_message: bool) -> (String, bool, Opti if !is_chat_message { top_quote = top_quote.map(|quote| { let quote_lines = split_lines(&quote); - let quote_lines = remove_message_footer(&quote_lines); + let (quote_lines, footer_removed) = remove_message_footer(&quote_lines); + is_cut = is_cut || footer_removed; + render_message(quote_lines, false) }); } - (text, is_forwarded, top_quote) + (text, is_forwarded, is_cut, top_quote) } /// Skips "forwarded message" header. @@ -254,7 +267,7 @@ mod tests { #[test] // proptest does not support [[:graphical:][:space:]] regex. fn test_simplify_plain_text_fuzzy(input in "[!-~\t \n]+") { - let (output, _is_forwarded, _) = simplify(input, true); + let (output, _is_forwarded, _, _) = simplify(input, true); assert!(output.split('\n').all(|s| s != "-- ")); } } @@ -262,38 +275,42 @@ mod tests { #[test] fn test_dont_remove_whole_message() { let input = "\n------\nFailed\n------\n\nUh-oh, this workflow did not succeed!\n\nlots of other text".to_string(); - let (plain, is_forwarded, _) = simplify(input, false); + let (plain, is_forwarded, is_cut, _) = simplify(input, false); assert_eq!( plain, "------\nFailed\n------\n\nUh-oh, this workflow did not succeed!\n\nlots of other text" ); assert!(!is_forwarded); + assert!(!is_cut); } #[test] fn test_chat_message() { let input = "Hi! How are you?\n\n---\n\nI am good.\n-- \nSent with my Delta Chat Messenger: https://delta.chat".to_string(); - let (plain, is_forwarded, _) = simplify(input, true); + let (plain, is_forwarded, is_cut, _) = simplify(input, true); assert_eq!(plain, "Hi! How are you?\n\n---\n\nI am good."); assert!(!is_forwarded); + assert!(is_cut); } #[test] fn test_simplify_trim() { let input = "line1\n\r\r\rline2".to_string(); - let (plain, is_forwarded, _) = simplify(input, false); + let (plain, is_forwarded, is_cut, _) = simplify(input, false); assert_eq!(plain, "line1\nline2"); assert!(!is_forwarded); + assert!(!is_cut); } #[test] fn test_simplify_forwarded_message() { let input = "---------- Forwarded message ----------\r\nFrom: test@example.com\r\n\r\nForwarded message\r\n-- \r\nSignature goes here".to_string(); - let (plain, is_forwarded, _) = simplify(input, false); + let (plain, is_forwarded, is_cut, _) = simplify(input, false); assert_eq!(plain, "Forwarded message"); assert!(is_forwarded); + assert!(is_cut); } #[test] @@ -335,41 +352,50 @@ mod tests { #[test] fn test_remove_message_footer() { let input = "text\n--\nno footer".to_string(); - let (plain, _, _) = simplify(input, true); + let (plain, _, is_cut, _) = simplify(input, true); assert_eq!(plain, "text\n--\nno footer"); + assert!(!is_cut); let input = "text\n\n--\n\nno footer".to_string(); - let (plain, _, _) = simplify(input, true); + let (plain, _, is_cut, _) = simplify(input, true); assert_eq!(plain, "text\n\n--\n\nno footer"); + assert!(!is_cut); let input = "text\n\n-- no footer\n\n".to_string(); - let (plain, _, _) = simplify(input, true); + let (plain, _, _, _) = simplify(input, true); assert_eq!(plain, "text\n\n-- no footer"); let input = "text\n\n--\nno footer\n-- \nfooter".to_string(); - let (plain, _, _) = simplify(input, true); + let (plain, _, is_cut, _) = simplify(input, true); assert_eq!(plain, "text\n\n--\nno footer"); + assert!(is_cut); let input = "text\n\n--\ntreated as footer when unescaped".to_string(); - let (plain, _, _) = simplify(input.clone(), true); + let (plain, _, is_cut, _) = simplify(input.clone(), true); assert_eq!(plain, "text"); // see remove_message_footer() for some explanations + assert!(is_cut); let escaped = escape_message_footer_marks(&input); - let (plain, _, _) = simplify(escaped, true); + let (plain, _, is_cut, _) = simplify(escaped, true); assert_eq!(plain, "text\n\n--\ntreated as footer when unescaped"); + assert!(!is_cut); // Nonstandard footer sent by https://siju.es/ let input = "Message text here\n---Desde mi teléfono con SIJÚ\n\nQuote here".to_string(); - let (plain, _, _) = simplify(input.clone(), false); + let (plain, _, is_cut, _) = simplify(input.clone(), false); assert_eq!(plain, "Message text here [...]"); - let (plain, _, _) = simplify(input.clone(), true); + assert!(is_cut); + let (plain, _, is_cut, _) = simplify(input.clone(), true); assert_eq!(plain, input); + assert!(!is_cut); let input = "--\ntreated as footer when unescaped".to_string(); - let (plain, _, _) = simplify(input.clone(), true); + let (plain, _, is_cut, _) = simplify(input.clone(), true); assert_eq!(plain, ""); // see remove_message_footer() for some explanations + assert!(is_cut); let escaped = escape_message_footer_marks(&input); - let (plain, _, _) = simplify(escaped, true); + let (plain, _, is_cut, _) = simplify(escaped, true); assert_eq!(plain, "--\ntreated as footer when unescaped"); + assert!(!is_cut); } } diff --git a/src/sql.rs b/src/sql.rs index 4c4fe98c5..b9c16078b 100644 --- a/src/sql.rs +++ b/src/sql.rs @@ -224,6 +224,31 @@ impl Sql { .await } + /// Check if a column exists in a given table. + pub async fn col_exists( + &self, + table_name: impl AsRef<str>, + col_name: impl AsRef<str>, + ) -> Result<bool> { + let table_name = table_name.as_ref().to_string(); + let col_name = col_name.as_ref().to_string(); + self.with_conn(move |conn| { + let mut exists = false; + // `PRAGMA table_info` returns one row per column, + // each row containing 0=cid, 1=name, 2=type, 3=notnull, 4=dflt_value + conn.pragma(None, "table_info", &table_name, |row| { + let curr_name: String = row.get(1)?; + if col_name == curr_name { + exists = true; + } + Ok(()) + })?; + + Ok(exists) + }) + .await + } + /// Execute a query which is expected to return zero or one row. pub async fn query_row_optional<T, F>( &self, @@ -1413,6 +1438,17 @@ CREATE INDEX devmsglabels_index1 ON devmsglabels (label); sql.set_raw_config_int(context, "dbversion", 71).await?; } + if dbversion < 72 { + info!(context, "[migration] v72"); + if !sql.col_exists("msgs", "mime_modified").await? { + sql.execute( + "ALTER TABLE msgs ADD COLUMN mime_modified INTEGER DEFAULT 0;", + paramsv![], + ) + .await?; + } + sql.set_raw_config_int(context, "dbversion", 72).await?; + } // (2) updates that require high-level objects // (the structure is complete now and all objects are usable) @@ -1468,6 +1504,7 @@ async fn prune_tombstones(context: &Context) -> Result<()> { #[cfg(test)] mod test { use super::*; + use crate::test_utils::TestContext; #[test] fn test_maybe_add_file() { @@ -1494,4 +1531,19 @@ mod test { assert!(!is_file_in_use(&files, Some(".txt"), "hello")); assert!(is_file_in_use(&files, Some("-suffix"), "world.txt-suffix")); } + + #[async_std::test] + async fn test_table_exists() { + let t = TestContext::new().await; + assert!(t.ctx.sql.table_exists("msgs").await.unwrap()); + assert!(!t.ctx.sql.table_exists("foobar").await.unwrap()); + } + + #[async_std::test] + async fn test_col_exists() { + let t = TestContext::new().await; + assert!(t.ctx.sql.col_exists("msgs", "mime_modified").await.unwrap()); + assert!(!t.ctx.sql.col_exists("msgs", "foobar").await.unwrap()); + assert!(!t.ctx.sql.col_exists("foobar", "foobar").await.unwrap()); + } } diff --git a/test-data/message/apple_cid_jpg.eml b/test-data/message/apple_cid_jpg.eml new file mode 100644 index 000000000..322403ffb --- /dev/null +++ b/test-data/message/apple_cid_jpg.eml @@ -0,0 +1,53 @@ +From: =?utf-8?Q?Bj=C3=B6rn_Petersen?= <somewhere-apple@me.com> +Content-Type: multipart/alternative; + boundary="Apple-Mail=_19251BCB-E12B-423A-9553-5A68560C2AFD" +Mime-Version: 1.0 (Mac OS X Mail 13.4 \(3608.120.23.2.4\)) +Subject: a jpeg +Message-Id: <BC47DA72-6C78-443A-8EBF-2CD199ABAD09@me.com> +Date: Sat, 9 Jan 2021 00:36:11 +0100 +To: somewhere-nonapple@testrun.org +X-Mailer: Apple Mail (2.3608.120.23.2.4) + + +--Apple-Mail=_19251BCB-E12B-423A-9553-5A68560C2AFD +Content-Transfer-Encoding: 7bit +Content-Type: text/plain; + charset=us-ascii + + +a jpeg +--Apple-Mail=_19251BCB-E12B-423A-9553-5A68560C2AFD +Content-Type: multipart/related; + type="text/html"; + boundary="Apple-Mail=_4C3710FD-D75D-47FB-8D41-983220390856" + + +--Apple-Mail=_4C3710FD-D75D-47FB-8D41-983220390856 +Content-Transfer-Encoding: 7bit +Content-Type: text/html; + charset=us-ascii + +<html><head><meta http-equiv="Content-Type" content="text/html; charset=us-ascii"><base></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; line-break: after-white-space;" class=""><base class=""><div class="Apple-Mail-URLShareUserContentTopClass"><br class=""></div><div class="Apple-Mail-URLShareWrapperClass"><blockquote type="cite" style="border-left-style: none; color: inherit; padding: inherit; margin: inherit;" class="">a jpeg + <img apple-inline="yes" id="118F6150-5EF5-4DE8-917F-1851EC94FB7C" src="cid:8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box" class=""></blockquote></div></body></html> +--Apple-Mail=_4C3710FD-D75D-47FB-8D41-983220390856 +Content-Transfer-Encoding: base64 +Content-Disposition: inline; + filename=small.jpg +Content-Type: image/jpeg; + x-unix-mode=0666; + name="small.jpg" +Content-Id: <8AE052EF-BC90-486F-BB78-58D3590308EC@fritz.box> + +/9j/4AAQSkZJRgABAQEBLAEsAAD/2wBDABELDA8MChEPDg8TEhEUGSobGRcXGTMkJh4qPDU/Pjs1 +OjlDS2BRQ0daSDk6U3FUWmNma2xrQFB2fnRofWBpa2f/2wBDARITExkWGTEbGzFnRTpFZ2dnZ2dn +Z2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2f/wgARCAAIAAgDAREA +AhEBAxEB/8QAFAABAAAAAAAAAAAAAAAAAAAABP/EABUBAQEAAAAAAAAAAAAAAAAAAAMF/9oADAMB +AAIQAxAAAAF8s//EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAQUCf//EABQRAQAAAAAAAAAA +AAAAAAAAAAD/2gAIAQMBAT8Bf//EABQRAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQIBAT8Bf//EABQQ +AQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEABj8Cf//EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEA +AT8hf//aAAwDAQACAAMAAAAQ3//EABQRAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQMBAT8Qf//EABQR +AQAAAAAAAAAAAAAAAAAAAAD/2gAIAQIBAT8Qf//EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEA +AT8Qf//Z +--Apple-Mail=_4C3710FD-D75D-47FB-8D41-983220390856-- + +--Apple-Mail=_19251BCB-E12B-423A-9553-5A68560C2AFD-- diff --git a/test-data/message/text_alt_html.eml b/test-data/message/text_alt_html.eml new file mode 100644 index 000000000..dde1b9ed2 --- /dev/null +++ b/test-data/message/text_alt_html.eml @@ -0,0 +1,16 @@ +Subject: mime-modified test +Message-ID: 12345@testrun.org +Date: Sat, 07 Dec 2019 19:00:27 +0000 +To: recp@testrun.org +From: sender@testrun.org +Content-Type: multipart/alternative; boundary="==BREAK==" + + +--==BREAK== +Content-Type: text/html; charset=utf-8 + +<html> + <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p> +</html> + +--==BREAK==-- diff --git a/test-data/message/text_alt_plain.eml b/test-data/message/text_alt_plain.eml new file mode 100644 index 000000000..aeed9b886 --- /dev/null +++ b/test-data/message/text_alt_plain.eml @@ -0,0 +1,16 @@ +Subject: mime-modified test +Message-ID: 12345@testrun.org +Date: Sat, 07 Dec 2019 19:00:27 +0000 +To: recp@testrun.org +From: sender@testrun.org +Content-Type: multipart/alternative; boundary="==BREAK==" + + +--==BREAK== +Content-Type: text/plain; charset=utf-8 + +mime-modified should not be set set as there is no html and no special stuff; +although not being a delta-message. +test some special html-characters as < > and & but also " and ' :) + +--==BREAK==-- diff --git a/test-data/message/text_alt_plain_html.eml b/test-data/message/text_alt_plain_html.eml new file mode 100644 index 000000000..596499778 --- /dev/null +++ b/test-data/message/text_alt_plain_html.eml @@ -0,0 +1,23 @@ +Subject: mime-modified test +Message-ID: 12345@testrun.org +Date: Sat, 07 Dec 2019 19:00:27 +0000 +To: recp@testrun.org +From: sender@testrun.org +Content-Type: multipart/alternative; boundary="==BREAK==" + + +--==BREAK== +Content-Type: text/plain; charset=utf-8 + +this is plain + +--==BREAK== +Content-Type: text/html; charset=utf-8 + +<html> + <p> + this is <b>html</b> + </p> +</html> + +--==BREAK==-- diff --git a/test-data/message/text_html.eml b/test-data/message/text_html.eml new file mode 100644 index 000000000..93c667de4 --- /dev/null +++ b/test-data/message/text_html.eml @@ -0,0 +1,11 @@ +Subject: mime-modified test +Message-ID: 12345@testrun.org +Date: Sat, 07 Dec 2019 19:00:27 +0000 +To: recp@testrun.org +From: sender@testrun.org +Content-Type: text/html; charset=utf-8 + + +<html> + <p>mime-modified <b>set</b>; simplify is always regarded as lossy.</p> +</html> \ No newline at end of file diff --git a/test-data/message/text_plain_flowed.eml b/test-data/message/text_plain_flowed.eml new file mode 100644 index 000000000..7bb4235b3 --- /dev/null +++ b/test-data/message/text_plain_flowed.eml @@ -0,0 +1,11 @@ +Message-Id: <lkjsdf01u@example.org> +Date: Sat, 14 Sep 2019 19:00:13 +0200 +From: lmn <x@tux.org> +To: abc <abc@bcd.com> +Content-Type: text/plain; charset=utf-8; format=flowed + +This line ends with a space +and will be merged with the next one due to format=flowed. + +This line does not end with a space +and will be wrapped as usual. diff --git a/test-data/message/text_plain_iso88591.eml b/test-data/message/text_plain_iso88591.eml new file mode 100644 index 000000000..6432c91e6 --- /dev/null +++ b/test-data/message/text_plain_iso88591.eml @@ -0,0 +1,7 @@ +Message-Id: <lkjsdf01u@example.org> +Date: Sat, 14 Sep 2019 19:00:13 +0200 +From: lmn <x@tux.org> +To: abc <abc@bcd.com> +Content-Type: text/plain; charset=iso-8859-1 + +message with a non-UTF-8 encoding: diff --git a/test-data/message/text_plain_unspecified.eml b/test-data/message/text_plain_unspecified.eml new file mode 100644 index 000000000..cefdf8457 --- /dev/null +++ b/test-data/message/text_plain_unspecified.eml @@ -0,0 +1,6 @@ +Message-Id: <lkjsdf01u@example.org> +Date: Sat, 14 Sep 2019 19:00:13 +0200 +From: lmn <x@tux.org> +To: abc <abc@bcd.com> + +This message does not have Content-Type nor Subject.