Fix #1804: remove <!doctype html> and accept invalid HTML

This fixes #1804 in two ways: First, it removes a <!doctype html> from
the start of the mail, if there is any.

Then, it parses the html itself it quick-xml fails, just stripping
everything between < and >.

Both of these would have fixed this specific issue.

Also, add tests for both fixes.
This commit is contained in:
Hocuri
2020-08-19 12:20:01 +02:00
committed by holger krekel
parent f1ec1a0765
commit 1a736ca6c3
3 changed files with 185 additions and 1 deletions

View File

@@ -2623,4 +2623,26 @@ mod tests {
);
assert_eq!(last_msg.from_id, DC_CONTACT_ID_INFO);
}
#[async_std::test]
async fn test_html_only_mail() {
let t = TestContext::new_alice().await;
t.ctx
.set_config(Config::ShowEmails, Some("2"))
.await
.unwrap();
dc_receive_imf(
&t.ctx,
include_bytes!("../test-data/message/wrong-html.eml"),
"INBOX",
0,
false,
)
.await
.unwrap();
let chats = Chatlist::try_load(&t.ctx, 0, None, None).await.unwrap();
let msg_id = chats.get_msg_id(0).unwrap();
let msg = Message::load_from_db(&t.ctx, msg_id).await.unwrap();
assert_eq!(msg.text.unwrap(), " Guten Abend, \n\n Lots of text \n\n text with Umlaut ä... \n\n MfG [...]");
}
}