diff --git a/src/dehtml.rs b/src/dehtml.rs index 152c01865..f80600c17 100644 --- a/src/dehtml.rs +++ b/src/dehtml.rs @@ -20,18 +20,18 @@ enum AddText { YesPreserveLineEnds, } -// dehtml() returns way too many newlines; however, an optimisation on this issue is not needed as -// the newlines are typically removed in further processing by the caller -pub fn dehtml(buf: &str) -> String { +/// dehtml() returns way too many newlines; however, an optimisation on this issue is not needed as +/// the newlines are typically removed in further processing by the caller +pub fn dehtml(buf: &str) -> Option { let s = dehtml_quick_xml(buf); if !s.trim().is_empty() { - return s; + return Some(s); } let s = dehtml_manually(buf); if !s.trim().is_empty() { - return s; + return Some(s); } - buf.to_string() + None } pub fn dehtml_quick_xml(buf: &str) -> String { @@ -220,21 +220,23 @@ mod tests { "", "[](https://get.delta.chat/)", ), - ("", ""), ("\nfat text", "*fat text*"), // Invalid html (at least DC should show the text if the html is invalid): ("\nsome text", "some text"), - ("", ""), ]; for (input, output) in cases { - assert_eq!(simplify(dehtml(input), true).0, output); + assert_eq!(simplify(dehtml(input).unwrap(), true).0, output); + } + let none_cases = vec![" ", ""]; + for input in none_cases { + assert_eq!(dehtml(input), None); } } #[test] fn test_dehtml_parse_br() { let html = "\r\r\nline1
\r\n\r\n\r\rline2
line3\n\r"; - let plain = dehtml(html); + let plain = dehtml(html).unwrap(); assert_eq!(plain, "line1\n\r\r\rline2\nline3"); } @@ -242,7 +244,7 @@ mod tests { #[test] fn test_dehtml_parse_href() { let html = "
text"); } @@ -260,7 +262,7 @@ mod tests { let html = "<>"'& äÄöÖüÜß fooÆçÇ ♦‎‏‌&noent;‍"; - let plain = dehtml(html); + let plain = dehtml(html).unwrap(); assert_eq!( plain, @@ -283,7 +285,7 @@ mod tests { "##; - let txt = dehtml(input); + let txt = dehtml(input).unwrap(); assert_eq!(txt.trim(), "lots of text"); } } diff --git a/src/mimeparser.rs b/src/mimeparser.rs index 362203705..f1deb1fd8 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -223,6 +223,7 @@ impl MimeMessage { failure_report: None, }; parser.parse_mime_recursive(context, &mail).await?; + parser.maybe_remove_bad_parts().await; parser.heuristically_parse_ndn(context).await; parser.parse_headers(context)?; @@ -713,12 +714,17 @@ impl MimeMessage { } }; + let mut dehtml_failed = false; + let (simplified_txt, is_forwarded, top_quote) = if decoded_data.is_empty() { ("".to_string(), false, None) } else { let is_html = mime_type == mime::TEXT_HTML; let out = if is_html { - dehtml(&decoded_data) + dehtml(&decoded_data).unwrap_or_else(|| { + dehtml_failed = true; + decoded_data.clone() + }) } else { decoded_data.clone() }; @@ -750,6 +756,7 @@ impl MimeMessage { if !simplified_txt.is_empty() || simplified_quote.is_some() { let mut part = Part::default(); + part.dehtlm_failed = dehtml_failed; part.typ = Viewtype::Text; part.mimetype = Some(mime_type); part.msg = simplified_txt; @@ -992,11 +999,21 @@ impl MimeMessage { Ok(None) } + async fn maybe_remove_bad_parts(&mut self) { + let good_parts = self.parts.iter().filter(|p| !p.dehtlm_failed).count(); + if good_parts == 0 { + // We have no good part but show at least one bad part in order to show anything at all + self.parts.truncate(1); + } else if good_parts < self.parts.len() { + self.parts.retain(|p| !p.dehtlm_failed); + } + } + /// Some providers like GMX and Yahoo do not send standard NDNs (Non Delivery notifications). /// If you improve heuristics here you might also have to change prefetch_should_download() in imap/mod.rs. /// Also you should add a test in dc_receive_imf.rs (there already are lots of test_parse_ndn_* tests). #[allow(clippy::indexing_slicing)] - async fn heuristically_parse_ndn(&mut self, context: &Context) -> Option<()> { + async fn heuristically_parse_ndn(&mut self, context: &Context) { let maybe_ndn = if let Some(from) = self.get(HeaderDef::From_) { let from = from.to_ascii_lowercase(); from.contains("mailer-daemon") || from.contains("mail-daemon") @@ -1025,7 +1042,6 @@ impl MimeMessage { } } } - None // Always return None, we just return anything so that we can use the '?' operator. } /// Handle reports @@ -1194,6 +1210,7 @@ pub struct Part { pub param: Params, org_filename: Option, pub error: Option, + dehtlm_failed: bool, } /// return mimetype and viewtype for a parsed mail @@ -1875,6 +1892,55 @@ MDYyMDYxNTE1RTlDOEE4Cj4+CnN0YXJ0eHJlZgo4Mjc4CiUlRU9GCg== assert_eq!(message.parts[0].msg, "Hello!"); } + #[async_std::test] + async fn test_hide_html_without_content() { + let t = TestContext::new().await; + let raw = br#"Date: Thu, 13 Feb 2020 22:41:20 +0000 (UTC) +From: sender@example.com +To: receiver@example.com +Subject: Mail with inline attachment +MIME-Version: 1.0 +Content-Type: multipart/mixed; + boundary="----=_Part_25_46172632.1581201680436" + +------=_Part_25_46172632.1581201680436 +Content-Type: text/html; charset=utf-8 + + + + + + + +------=_Part_25_46172632.1581201680436 +Content-Type: application/pdf; name="some_pdf.pdf" +Content-Transfer-Encoding: base64 +Content-Disposition: inline; filename="some_pdf.pdf" + +JVBERi0xLjUKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURl +Y29kZT4+CnN0cmVhbQp4nGVOuwoCMRDs8xVbC8aZvC4Hx4Hno7ATAhZi56MTtPH33YtXiLKQ3ZnM +MDYyMDYxNTE1RTlDOEE4Cj4+CnN0YXJ0eHJlZgo4Mjc4CiUlRU9GCg== +------=_Part_25_46172632.1581201680436-- +"#; + + let message = MimeMessage::from_bytes(&t.ctx, &raw[..]).await.unwrap(); + + assert_eq!(message.parts.len(), 1); + assert_eq!(message.parts[0].typ, Viewtype::File); + assert_eq!(message.parts[0].msg, ""); + + // Make sure the file is there even though the html is wrong: + let param = &message.parts[0].param; + let blob: BlobObject = param + .get_blob(Param::File, &t.ctx, false) + .await + .unwrap() + .unwrap(); + let f = async_std::fs::File::open(blob.to_abs_path()).await.unwrap(); + let size = f.metadata().await.unwrap().len(); + assert_eq!(size, 154); + } + #[async_std::test] async fn parse_inline_image() { let context = TestContext::new().await;