add option to access original message (#2125)

* draft API to deal with uncut message texts

* add column mime_modified

* add mime_modified flag to MimeParser and save it in the database

* save mime_headers also when mime_modified is set

* cargo fmt

* set mime_modified on parsed html-texts and when there are multiple alternative-parts; add test for that

* prototype functions, add to repl and ffi

* use correct mime_modified flag

* basically parse Mime-Structure to HTML

* add basic tests for HTML-parsing

* convert text/plain to html for getting original

* respect charset for plain texts

* make test more specific

* fix handling non-utf-8 charsets for plain messages

* add test for plain_to_html()

* add failing test for plaintext linkify

* linkify urls in plain text

* fix regex

* plain text linkify: add failing test for encapsulated links as <https://domain.com>

* plain text linkify: make encapsulated links as <https://domain.com> work

* plain text linkify: require word boundary at beginning of link, add tests for that

* plain text linkify: linkify emails

* plain text: support format=flowed

* plain text: support quotes

* make clippy happy

* set mime-modified also when simplify() cuts non-html messages, add tests for that

* streamline mime recursion

* repl tool: write original html to file for further processing

* convert cid:- to data:-protocol

* add a test for cid: to data: conversion

* make clippy happy

* fix html-tests to work with windows-lineends

* clarify what the returned html-code may contain

* add some more detailed doc comments

* add mime_modified column only if not exist

this additional check is needed
as the column may added with another dbversion in
some shipped beta-versions.

* incorporate documentation suggestions from review

* rename get_original_mime_html() to more simple get_html()

* rename api is_mime_modified() to more simple has_html(); internally, mime_modified-flag stays as-is, however

* rename MimeS to MimeMultipartType

* do not set mime-modified flag for encrypted messages that need extra-handling for saved mime-structure

* fix typo

* move get_msg_html() to MsgId.get_html()

* incorporate more documentation suggestions from review

* remove unused return value from collect_texts_recursive()

* avoid mime_modified being mutable in write-parts-loop

* move 'use futures::future::FutureExt' atop of html.rs

* move attributes defining plain-text to a dedicated structure

* more PlainText to separate file

* escape cid when building regex

* let dc_get_msg_html() return NULL when calling with bad param
This commit is contained in:
bjoern
2021-01-11 17:40:35 +01:00
committed by GitHub
parent bb9e6038c4
commit e2688f6355
21 changed files with 1141 additions and 47 deletions

View File

@@ -17,13 +17,16 @@ pub fn escape_message_footer_marks(text: &str) -> String {
}
/// Remove standard (RFC 3676, §4.3) footer if it is found.
/// Returns `(lines, is_footer_removed)` tuple;
/// `is_footer_removed` is set to `true` if the footer was actually removed from `lines`
/// (which is equal to the input array otherwise).
#[allow(clippy::indexing_slicing)]
fn remove_message_footer<'a>(lines: &'a [&str]) -> &'a [&'a str] {
fn remove_message_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
let mut nearly_standard_footer = None;
for (ix, &line) in lines.iter().enumerate() {
match line {
// some providers encode `-- ` to `-- =20` which results in `-- `
"-- " | "-- " => return &lines[..ix],
"-- " | "-- " => return (&lines[..ix], true),
// some providers encode `-- ` to `=2D-` which results in only `--`;
// use that only when no other footer is found
// and if the line before is empty and the line after is not empty
@@ -37,13 +40,15 @@ fn remove_message_footer<'a>(lines: &'a [&str]) -> &'a [&'a str] {
}
}
if let Some(ix) = nearly_standard_footer {
return &lines[..ix];
return (&lines[..ix], true);
}
lines
(lines, false)
}
/// Remove nonstandard footer and a boolean indicating whether such
/// footer was removed.
/// Remove nonstandard footer and a boolean indicating whether such footer was removed.
/// Returns `(lines, is_footer_removed)` tuple;
/// `is_footer_removed` is set to `true` if the footer was actually removed from `lines`
/// (which is equal to the input array otherwise).
#[allow(clippy::indexing_slicing)]
fn remove_nonstandard_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
for (ix, &line) in lines.iter().enumerate() {
@@ -60,20 +65,25 @@ fn remove_nonstandard_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
(lines, false)
}
fn split_lines(buf: &str) -> Vec<&str> {
pub(crate) fn split_lines(buf: &str) -> Vec<&str> {
buf.split('\n').collect()
}
/// Simplify message text for chat display.
/// Remove quotes, signatures, trailing empty lines etc.
pub fn simplify(mut input: String, is_chat_message: bool) -> (String, bool, Option<String>) {
/// Returns `(text, is_forwarded, is_cut, quote)` tuple,
/// returning the simplified text and some additional information gained from the input.
pub fn simplify(mut input: String, is_chat_message: bool) -> (String, bool, bool, Option<String>) {
let mut is_cut = false;
input.retain(|c| c != '\r');
let lines = split_lines(&input);
let (lines, is_forwarded) = skip_forward_header(&lines);
let (lines, mut top_quote) = remove_top_quote(lines);
let original_lines = &lines;
let lines = remove_message_footer(lines);
let (lines, footer_removed) = remove_message_footer(lines);
is_cut = is_cut || footer_removed;
let text = if is_chat_message {
render_message(lines, false)
@@ -88,6 +98,7 @@ pub fn simplify(mut input: String, is_chat_message: bool) -> (String, bool, Opti
if lines.iter().all(|it| it.trim().is_empty()) {
render_message(original_lines, false)
} else {
is_cut = is_cut || has_nonstandard_footer || bottom_quote.is_some();
render_message(lines, has_nonstandard_footer || bottom_quote.is_some())
}
};
@@ -95,11 +106,13 @@ pub fn simplify(mut input: String, is_chat_message: bool) -> (String, bool, Opti
if !is_chat_message {
top_quote = top_quote.map(|quote| {
let quote_lines = split_lines(&quote);
let quote_lines = remove_message_footer(&quote_lines);
let (quote_lines, footer_removed) = remove_message_footer(&quote_lines);
is_cut = is_cut || footer_removed;
render_message(quote_lines, false)
});
}
(text, is_forwarded, top_quote)
(text, is_forwarded, is_cut, top_quote)
}
/// Skips "forwarded message" header.
@@ -254,7 +267,7 @@ mod tests {
#[test]
// proptest does not support [[:graphical:][:space:]] regex.
fn test_simplify_plain_text_fuzzy(input in "[!-~\t \n]+") {
let (output, _is_forwarded, _) = simplify(input, true);
let (output, _is_forwarded, _, _) = simplify(input, true);
assert!(output.split('\n').all(|s| s != "-- "));
}
}
@@ -262,38 +275,42 @@ mod tests {
#[test]
fn test_dont_remove_whole_message() {
let input = "\n------\nFailed\n------\n\nUh-oh, this workflow did not succeed!\n\nlots of other text".to_string();
let (plain, is_forwarded, _) = simplify(input, false);
let (plain, is_forwarded, is_cut, _) = simplify(input, false);
assert_eq!(
plain,
"------\nFailed\n------\n\nUh-oh, this workflow did not succeed!\n\nlots of other text"
);
assert!(!is_forwarded);
assert!(!is_cut);
}
#[test]
fn test_chat_message() {
let input = "Hi! How are you?\n\n---\n\nI am good.\n-- \nSent with my Delta Chat Messenger: https://delta.chat".to_string();
let (plain, is_forwarded, _) = simplify(input, true);
let (plain, is_forwarded, is_cut, _) = simplify(input, true);
assert_eq!(plain, "Hi! How are you?\n\n---\n\nI am good.");
assert!(!is_forwarded);
assert!(is_cut);
}
#[test]
fn test_simplify_trim() {
let input = "line1\n\r\r\rline2".to_string();
let (plain, is_forwarded, _) = simplify(input, false);
let (plain, is_forwarded, is_cut, _) = simplify(input, false);
assert_eq!(plain, "line1\nline2");
assert!(!is_forwarded);
assert!(!is_cut);
}
#[test]
fn test_simplify_forwarded_message() {
let input = "---------- Forwarded message ----------\r\nFrom: test@example.com\r\n\r\nForwarded message\r\n-- \r\nSignature goes here".to_string();
let (plain, is_forwarded, _) = simplify(input, false);
let (plain, is_forwarded, is_cut, _) = simplify(input, false);
assert_eq!(plain, "Forwarded message");
assert!(is_forwarded);
assert!(is_cut);
}
#[test]
@@ -335,41 +352,50 @@ mod tests {
#[test]
fn test_remove_message_footer() {
let input = "text\n--\nno footer".to_string();
let (plain, _, _) = simplify(input, true);
let (plain, _, is_cut, _) = simplify(input, true);
assert_eq!(plain, "text\n--\nno footer");
assert!(!is_cut);
let input = "text\n\n--\n\nno footer".to_string();
let (plain, _, _) = simplify(input, true);
let (plain, _, is_cut, _) = simplify(input, true);
assert_eq!(plain, "text\n\n--\n\nno footer");
assert!(!is_cut);
let input = "text\n\n-- no footer\n\n".to_string();
let (plain, _, _) = simplify(input, true);
let (plain, _, _, _) = simplify(input, true);
assert_eq!(plain, "text\n\n-- no footer");
let input = "text\n\n--\nno footer\n-- \nfooter".to_string();
let (plain, _, _) = simplify(input, true);
let (plain, _, is_cut, _) = simplify(input, true);
assert_eq!(plain, "text\n\n--\nno footer");
assert!(is_cut);
let input = "text\n\n--\ntreated as footer when unescaped".to_string();
let (plain, _, _) = simplify(input.clone(), true);
let (plain, _, is_cut, _) = simplify(input.clone(), true);
assert_eq!(plain, "text"); // see remove_message_footer() for some explanations
assert!(is_cut);
let escaped = escape_message_footer_marks(&input);
let (plain, _, _) = simplify(escaped, true);
let (plain, _, is_cut, _) = simplify(escaped, true);
assert_eq!(plain, "text\n\n--\ntreated as footer when unescaped");
assert!(!is_cut);
// Nonstandard footer sent by https://siju.es/
let input = "Message text here\n---Desde mi teléfono con SIJÚ\n\nQuote here".to_string();
let (plain, _, _) = simplify(input.clone(), false);
let (plain, _, is_cut, _) = simplify(input.clone(), false);
assert_eq!(plain, "Message text here [...]");
let (plain, _, _) = simplify(input.clone(), true);
assert!(is_cut);
let (plain, _, is_cut, _) = simplify(input.clone(), true);
assert_eq!(plain, input);
assert!(!is_cut);
let input = "--\ntreated as footer when unescaped".to_string();
let (plain, _, _) = simplify(input.clone(), true);
let (plain, _, is_cut, _) = simplify(input.clone(), true);
assert_eq!(plain, ""); // see remove_message_footer() for some explanations
assert!(is_cut);
let escaped = escape_message_footer_marks(&input);
let (plain, _, _) = simplify(escaped, true);
let (plain, _, is_cut, _) = simplify(escaped, true);
assert_eq!(plain, "--\ntreated as footer when unescaped");
assert!(!is_cut);
}
}