add option to access original message (#2125)

* draft API to deal with uncut message texts

* add column mime_modified

* add mime_modified flag to MimeParser and save it in the database

* save mime_headers also when mime_modified is set

* cargo fmt

* set mime_modified on parsed html-texts and when there are multiple alternative-parts; add test for that

* prototype functions, add to repl and ffi

* use correct mime_modified flag

* basically parse Mime-Structure to HTML

* add basic tests for HTML-parsing

* convert text/plain to html for getting original

* respect charset for plain texts

* make test more specific

* fix handling non-utf-8 charsets for plain messages

* add test for plain_to_html()

* add failing test for plaintext linkify

* linkify urls in plain text

* fix regex

* plain text linkify: add failing test for encapsulated links as <https://domain.com>

* plain text linkify: make encapsulated links as <https://domain.com> work

* plain text linkify: require word boundary at beginning of link, add tests for that

* plain text linkify: linkify emails

* plain text: support format=flowed

* plain text: support quotes

* make clippy happy

* set mime-modified also when simplify() cuts non-html messages, add tests for that

* streamline mime recursion

* repl tool: write original html to file for further processing

* convert cid:- to data:-protocol

* add a test for cid: to data: conversion

* make clippy happy

* fix html-tests to work with windows-lineends

* clarify what the returned html-code may contain

* add some more detailed doc comments

* add mime_modified column only if not exist

this additional check is needed
as the column may added with another dbversion in
some shipped beta-versions.

* incorporate documentation suggestions from review

* rename get_original_mime_html() to more simple get_html()

* rename api is_mime_modified() to more simple has_html(); internally, mime_modified-flag stays as-is, however

* rename MimeS to MimeMultipartType

* do not set mime-modified flag for encrypted messages that need extra-handling for saved mime-structure

* fix typo

* move get_msg_html() to MsgId.get_html()

* incorporate more documentation suggestions from review

* remove unused return value from collect_texts_recursive()

* avoid mime_modified being mutable in write-parts-loop

* move 'use futures::future::FutureExt' atop of html.rs

* move attributes defining plain-text to a dedicated structure

* more PlainText to separate file

* escape cid when building regex

* let dc_get_msg_html() return NULL when calling with bad param
This commit is contained in:
bjoern
2021-01-11 17:40:35 +01:00
committed by GitHub
parent bb9e6038c4
commit e2688f6355
21 changed files with 1141 additions and 47 deletions

View File

@@ -65,6 +65,10 @@ pub struct MimeMessage {
pub(crate) group_avatar: Option<AvatarAction>,
pub(crate) mdn_reports: Vec<Report>,
pub(crate) failure_report: Option<FailureReport>,
// if this flag is set, the parts/text/etc. are just close to the original mime-message;
// clients should offer a way to view the original message in this case
pub is_mime_modified: bool,
}
#[derive(Debug, PartialEq)]
@@ -223,6 +227,7 @@ impl MimeMessage {
user_avatar: None,
group_avatar: None,
failure_report: None,
is_mime_modified: false,
};
parser.parse_mime_recursive(context, &mail).await?;
parser.maybe_remove_bad_parts();
@@ -598,6 +603,12 @@ impl MimeMessage {
}
}
}
if any_part_added && mail.subparts.len() > 1 {
// there are other alternative parts, likely HTML,
// so we might have missed some content on simplifying.
// set mime-modified to force the ui to display a show-message button.
self.is_mime_modified = true;
}
}
(mime::MULTIPART, "encrypted") => {
// we currently do not try to decrypt non-autocrypt messages
@@ -727,20 +738,26 @@ impl MimeMessage {
let mut dehtml_failed = false;
let (simplified_txt, is_forwarded, top_quote) = if decoded_data.is_empty() {
("".to_string(), false, None)
} else {
let is_html = mime_type == mime::TEXT_HTML;
let out = if is_html {
dehtml(&decoded_data).unwrap_or_else(|| {
dehtml_failed = true;
decoded_data.clone()
})
let (simplified_txt, is_forwarded, is_cut, top_quote) =
if decoded_data.is_empty() {
("".to_string(), false, false, None)
} else {
decoded_data.clone()
let is_html = mime_type == mime::TEXT_HTML;
let out = if is_html {
self.is_mime_modified = true;
dehtml(&decoded_data).unwrap_or_else(|| {
dehtml_failed = true;
decoded_data.clone()
})
} else {
decoded_data.clone()
};
simplify(out, self.has_chat_version())
};
simplify(out, self.has_chat_version())
};
self.is_mime_modified = self.is_mime_modified
|| ((is_forwarded || is_cut || top_quote.is_some())
&& !self.has_chat_version());
let is_format_flowed = if let Some(format) = mail.ctype.params.get("format")
{
@@ -2562,4 +2579,67 @@ On 2020-10-25, Bob wrote:
assert_eq!(msg.get_height(), 64);
assert_eq!(msg.get_filemime().unwrap(), "image/png");
}
#[async_std::test]
async fn test_mime_modified_plain() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml");
let mimeparser = MimeMessage::from_bytes(&t.ctx, raw).await.unwrap();
assert!(!mimeparser.is_mime_modified);
assert_eq!(
mimeparser.parts[0].msg,
"This message does not have Content-Type nor Subject."
);
}
#[async_std::test]
async fn test_mime_modified_alt_plain_html() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml");
let mimeparser = MimeMessage::from_bytes(&t.ctx, raw).await.unwrap();
assert!(mimeparser.is_mime_modified);
assert_eq!(
mimeparser.parts[0].msg,
"mime-modified test this is plain"
);
}
#[async_std::test]
async fn test_mime_modified_alt_plain() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_alt_plain.eml");
let mimeparser = MimeMessage::from_bytes(&t.ctx, raw).await.unwrap();
assert!(!mimeparser.is_mime_modified);
assert_eq!(
mimeparser.parts[0].msg,
"mime-modified test \
mime-modified should not be set set as there is no html and no special stuff;\n\
although not being a delta-message.\n\
test some special html-characters as < > and & but also \" and ' :)"
);
}
#[async_std::test]
async fn test_mime_modified_alt_html() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_alt_html.eml");
let mimeparser = MimeMessage::from_bytes(&t.ctx, raw).await.unwrap();
assert!(mimeparser.is_mime_modified);
assert_eq!(
mimeparser.parts[0].msg,
"mime-modified test mime-modified *set*; simplify is always regarded as lossy."
);
}
#[async_std::test]
async fn test_mime_modified_html() {
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/text_html.eml");
let mimeparser = MimeMessage::from_bytes(&t.ctx, raw).await.unwrap();
assert!(mimeparser.is_mime_modified);
assert_eq!(
mimeparser.parts[0].msg,
"mime-modified test mime-modified *set*; simplify is always regarded as lossy."
);
}
}