Don't show HTML if there is no content and there is a file attached

Fix https://github.com/deltachat/deltachat-core-rust/issues/1982
This commit is contained in:
Hocuri
2020-10-12 18:52:12 +02:00
committed by holger krekel
parent 9bf8799484
commit 4d2542cee5
2 changed files with 85 additions and 17 deletions

View File

@@ -20,18 +20,18 @@ enum AddText {
YesPreserveLineEnds,
}
// dehtml() returns way too many newlines; however, an optimisation on this issue is not needed as
// the newlines are typically removed in further processing by the caller
pub fn dehtml(buf: &str) -> String {
/// dehtml() returns way too many newlines; however, an optimisation on this issue is not needed as
/// the newlines are typically removed in further processing by the caller
pub fn dehtml(buf: &str) -> Option<String> {
let s = dehtml_quick_xml(buf);
if !s.trim().is_empty() {
return s;
return Some(s);
}
let s = dehtml_manually(buf);
if !s.trim().is_empty() {
return s;
return Some(s);
}
buf.to_string()
None
}
pub fn dehtml_quick_xml(buf: &str) -> String {
@@ -220,21 +220,23 @@ mod tests {
"<a href='https://get.delta.chat/'/>",
"[](https://get.delta.chat/)",
),
("", ""),
("<!doctype html>\n<b>fat text</b>", "*fat text*"),
// Invalid html (at least DC should show the text if the html is invalid):
("<!some invalid html code>\n<b>some text</b>", "some text"),
("<This text is in brackets>", "<This text is in brackets>"),
];
for (input, output) in cases {
assert_eq!(simplify(dehtml(input), true).0, output);
assert_eq!(simplify(dehtml(input).unwrap(), true).0, output);
}
let none_cases = vec!["<html> </html>", ""];
for input in none_cases {
assert_eq!(dehtml(input), None);
}
}
#[test]
fn test_dehtml_parse_br() {
let html = "\r\r\nline1<br>\r\n\r\n\r\rline2<br/>line3\n\r";
let plain = dehtml(html);
let plain = dehtml(html).unwrap();
assert_eq!(plain, "line1\n\r\r\rline2\nline3");
}
@@ -242,7 +244,7 @@ mod tests {
#[test]
fn test_dehtml_parse_href() {
let html = "<a href=url>text</a";
let plain = dehtml(html);
let plain = dehtml(html).unwrap();
assert_eq!(plain, "[text](url)");
}
@@ -250,7 +252,7 @@ mod tests {
#[test]
fn test_dehtml_bold_text() {
let html = "<!DOCTYPE name [<!DOCTYPE ...>]><!-- comment -->text <b><?php echo ... ?>bold</b><![CDATA[<>]]>";
let plain = dehtml(html);
let plain = dehtml(html).unwrap();
assert_eq!(plain, "text *bold*<>");
}
@@ -260,7 +262,7 @@ mod tests {
let html =
"&lt;&gt;&quot;&apos;&amp; &auml;&Auml;&ouml;&Ouml;&uuml;&Uuml;&szlig; foo&AElig;&ccedil;&Ccedil; &diams;&lrm;&rlm;&zwnj;&noent;&zwj;";
let plain = dehtml(html);
let plain = dehtml(html).unwrap();
assert_eq!(
plain,
@@ -283,7 +285,7 @@ mod tests {
</body>
</html>
"##;
let txt = dehtml(input);
let txt = dehtml(input).unwrap();
assert_eq!(txt.trim(), "lots of text");
}
}

View File

@@ -223,6 +223,7 @@ impl MimeMessage {
failure_report: None,
};
parser.parse_mime_recursive(context, &mail).await?;
parser.maybe_remove_bad_parts().await;
parser.heuristically_parse_ndn(context).await;
parser.parse_headers(context)?;
@@ -713,12 +714,17 @@ impl MimeMessage {
}
};
let mut dehtml_failed = false;
let (simplified_txt, is_forwarded, top_quote) = if decoded_data.is_empty() {
("".to_string(), false, None)
} else {
let is_html = mime_type == mime::TEXT_HTML;
let out = if is_html {
dehtml(&decoded_data)
dehtml(&decoded_data).unwrap_or_else(|| {
dehtml_failed = true;
decoded_data.clone()
})
} else {
decoded_data.clone()
};
@@ -750,6 +756,7 @@ impl MimeMessage {
if !simplified_txt.is_empty() || simplified_quote.is_some() {
let mut part = Part::default();
part.dehtlm_failed = dehtml_failed;
part.typ = Viewtype::Text;
part.mimetype = Some(mime_type);
part.msg = simplified_txt;
@@ -992,11 +999,21 @@ impl MimeMessage {
Ok(None)
}
async fn maybe_remove_bad_parts(&mut self) {
let good_parts = self.parts.iter().filter(|p| !p.dehtlm_failed).count();
if good_parts == 0 {
// We have no good part but show at least one bad part in order to show anything at all
self.parts.truncate(1);
} else if good_parts < self.parts.len() {
self.parts.retain(|p| !p.dehtlm_failed);
}
}
/// Some providers like GMX and Yahoo do not send standard NDNs (Non Delivery notifications).
/// If you improve heuristics here you might also have to change prefetch_should_download() in imap/mod.rs.
/// Also you should add a test in dc_receive_imf.rs (there already are lots of test_parse_ndn_* tests).
#[allow(clippy::indexing_slicing)]
async fn heuristically_parse_ndn(&mut self, context: &Context) -> Option<()> {
async fn heuristically_parse_ndn(&mut self, context: &Context) {
let maybe_ndn = if let Some(from) = self.get(HeaderDef::From_) {
let from = from.to_ascii_lowercase();
from.contains("mailer-daemon") || from.contains("mail-daemon")
@@ -1025,7 +1042,6 @@ impl MimeMessage {
}
}
}
None // Always return None, we just return anything so that we can use the '?' operator.
}
/// Handle reports
@@ -1194,6 +1210,7 @@ pub struct Part {
pub param: Params,
org_filename: Option<String>,
pub error: Option<String>,
dehtlm_failed: bool,
}
/// return mimetype and viewtype for a parsed mail
@@ -1875,6 +1892,55 @@ MDYyMDYxNTE1RTlDOEE4Cj4+CnN0YXJ0eHJlZgo4Mjc4CiUlRU9GCg==
assert_eq!(message.parts[0].msg, "Hello!");
}
#[async_std::test]
async fn test_hide_html_without_content() {
let t = TestContext::new().await;
let raw = br#"Date: Thu, 13 Feb 2020 22:41:20 +0000 (UTC)
From: sender@example.com
To: receiver@example.com
Subject: Mail with inline attachment
MIME-Version: 1.0
Content-Type: multipart/mixed;
boundary="----=_Part_25_46172632.1581201680436"
------=_Part_25_46172632.1581201680436
Content-Type: text/html; charset=utf-8
<head>
<meta http-equiv="Content-Type" content="text/html; charset=Windows-1252">
<meta name="GENERATOR" content="MSHTML 11.00.10570.1001"></head>
<body><img align="baseline" alt="" src="cid:1712254131-1" border="0" hspace="0">
</body>
------=_Part_25_46172632.1581201680436
Content-Type: application/pdf; name="some_pdf.pdf"
Content-Transfer-Encoding: base64
Content-Disposition: inline; filename="some_pdf.pdf"
JVBERi0xLjUKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURl
Y29kZT4+CnN0cmVhbQp4nGVOuwoCMRDs8xVbC8aZvC4Hx4Hno7ATAhZi56MTtPH33YtXiLKQ3ZnM
MDYyMDYxNTE1RTlDOEE4Cj4+CnN0YXJ0eHJlZgo4Mjc4CiUlRU9GCg==
------=_Part_25_46172632.1581201680436--
"#;
let message = MimeMessage::from_bytes(&t.ctx, &raw[..]).await.unwrap();
assert_eq!(message.parts.len(), 1);
assert_eq!(message.parts[0].typ, Viewtype::File);
assert_eq!(message.parts[0].msg, "");
// Make sure the file is there even though the html is wrong:
let param = &message.parts[0].param;
let blob: BlobObject = param
.get_blob(Param::File, &t.ctx, false)
.await
.unwrap()
.unwrap();
let f = async_std::fs::File::open(blob.to_abs_path()).await.unwrap();
let size = f.metadata().await.unwrap().len();
assert_eq!(size, 154);
}
#[async_std::test]
async fn parse_inline_image() {
let context = TestContext::new().await;